2 @message |
RecursionError: maximum recursion depth exceeded
LOG DETAILS:
2025-07-01 05:44:06.007
2025-07-01 05:44:06.017 act = <firebird.qa.plugin.Action object at [hex]>
2025-07-01 05:44:06.025
2025-07-01 05:44:06.032 @pytest.mark.version('>=3')
2025-07-01 05:44:06.039 def test_1(act: Action):
2025-07-01 05:44:06.049 act.expected_stdout = expected_stdout
2025-07-01 05:44:06.055 act.execute()
2025-07-01 05:44:06.063 > assert act.clean_stdout == act.clean_expected_stdout
2025-07-01 05:44:06.074
2025-07-01 05:44:06.083 tests/bugs/core_2969_test.py:1211:
2025-07-01 05:44:06.090 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:44:06.101
2025-07-01 05:44:06.110 ops = ('==',), results = (False,)
2025-07-01 05:44:06.116 expls = ('%(py2)s\n{%(py2)s = %(py0)s.clean_stdout\n} == %(py6)s\n{%(py6)s = %(py4)s.clean_expected_stdout\n}',)
2025-07-01 05:44:06.124 each_obj = ('WAS_OVERWRITTEN CTX_KEY CTX_VAL\n=============== ============================== =======\n1var...yyy\n1 var_997 yyy\n1 var_998 yyy\n1 var_999 yyy')
2025-07-01 05:44:06.135
2025-07-01 05:44:06.146 def _call_reprcompare(
2025-07-01 05:44:06.155 ops: Sequence[str],
2025-07-01 05:44:06.163 results: Sequence[bool],
2025-07-01 05:44:06.173 expls: Sequence[str],
2025-07-01 05:44:06.185 each_obj: Sequence[object],
2025-07-01 05:44:06.196 ) -> str:
2025-07-01 05:44:06.208 for i, res, expl in zip(range(len(ops)), results, expls):
2025-07-01 05:44:06.219 try:
2025-07-01 05:44:06.227 done = not res
2025-07-01 05:44:06.234 except Exception:
2025-07-01 05:44:06.241 done = True
2025-07-01 05:44:06.249 if done:
2025-07-01 05:44:06.256 break
2025-07-01 05:44:06.263 if util._reprcompare is not None:
2025-07-01 05:44:06.274 > custom = util._reprcompare(ops[i], each_obj[i], each_obj[i + 1])
2025-07-01 05:44:06.282
2025-07-01 05:44:06.287 ../lib/python3.11/site-packages/_pytest/assertion/rewrite.py:499:
2025-07-01 05:44:06.297 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:44:06.307
2025-07-01 05:44:06.316 op = '=='
2025-07-01 05:44:06.324 left = 'WAS_OVERWRITTEN CTX_KEY CTX_VAL\n=============== ============================== =======\n1var_... yyy\n1var_997 yyy\n1var_998 yyy\n1var_999 yyy'
2025-07-01 05:44:06.330 right = 'WAS_OVERWRITTEN CTX_KEY CTX_VAL\n=============== ============================== =======\n1 var... yyy\n1 var_997 yyy\n1 var_998 yyy\n1 var_999 yyy'
2025-07-01 05:44:06.335
2025-07-01 05:44:06.341 def callbinrepr(op, left: object, right: object) -> Optional[str]:
2025-07-01 05:44:06.354 """Call the pytest_assertrepr_compare hook and prepare the result.
2025-07-01 05:44:06.362
2025-07-01 05:44:06.369 This uses the first result from the hook and then ensures the
2025-07-01 05:44:06.375 following:
2025-07-01 05:44:06.381 * Overly verbose explanations are truncated unless configured otherwise
2025-07-01 05:44:06.392 (eg. if running in verbose mode).
2025-07-01 05:44:06.403 * Embedded newlines are escaped to help util.format_explanation()
2025-07-01 05:44:06.415 later.
2025-07-01 05:44:06.424 * If the rewrite mode is used embedded %-characters are replaced
2025-07-01 05:44:06.431 to protect later % formatting.
2025-07-01 05:44:06.439
2025-07-01 05:44:06.445 The result can be formatted by util.format_explanation() for
2025-07-01 05:44:06.452 pretty printing.
2025-07-01 05:44:06.457 """
2025-07-01 05:44:06.463 > hook_result = ihook.pytest_assertrepr_compare(
2025-07-01 05:44:06.470 config=item.config, op=op, left=left, right=right
2025-07-01 05:44:06.475 )
2025-07-01 05:44:06.480
2025-07-01 05:44:06.486 ../lib/python3.11/site-packages/_pytest/assertion/__init__.py:141:
2025-07-01 05:44:06.496 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:44:06.507
2025-07-01 05:44:06.518 self = <HookCaller 'pytest_assertrepr_compare'>
2025-07-01 05:44:06.529 kwargs = {'config': <_pytest.config.Config object at [hex]>, 'left': 'WAS_OVERWRITTEN CTX_KEY C...yyy\n1 var_997 yyy\n1 var_998 yyy\n1 var_999 yyy'}
2025-07-01 05:44:06.537 firstresult = False
2025-07-01 05:44:06.543
2025-07-01 05:44:06.550 def __call__(self, **kwargs: object) -> Any:
2025-07-01 05:44:06.555 """Call the hook.
2025-07-01 05:44:06.562
2025-07-01 05:44:06.575 Only accepts keyword arguments, which should match the hook
2025-07-01 05:44:06.585 specification.
2025-07-01 05:44:06.593
2025-07-01 05:44:06.599 Returns the result(s) of calling all registered plugins, see
2025-07-01 05:44:06.607 :ref:`calling`.
2025-07-01 05:44:06.618 """
2025-07-01 05:44:06.627 assert (
2025-07-01 05:44:06.633 not self.is_historic()
2025-07-01 05:44:06.639 ), "Cannot directly call a historic hook - use call_historic instead."
2025-07-01 05:44:06.647 self._verify_all_args_are_provided(kwargs)
2025-07-01 05:44:06.654 firstresult = self.spec.opts.get("firstresult", False) if self.spec else False
2025-07-01 05:44:06.663 > return self._hookexec(self.name, self._hookimpls, kwargs, firstresult)
2025-07-01 05:44:06.673
2025-07-01 05:44:06.681 ../lib/python3.11/site-packages/pluggy/_hooks.py:493:
2025-07-01 05:44:06.692 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:44:06.701
2025-07-01 05:44:06.708 self = <_pytest.config.PytestPluginManager object at [hex]>
2025-07-01 05:44:06.715 hook_name = 'pytest_assertrepr_compare'
2025-07-01 05:44:06.722 methods = [<HookImpl plugin_name='assertion', plugin=<module '_pytest.assertion' from '/opt/distr/venv/lib/python3.11/site-packa...in_name='firebird', plugin=<module 'firebird.qa.plugin' from '/opt/distr/venv/firebird-qa/src/firebird/qa/plugin.py'>>]
2025-07-01 05:44:06.731 kwargs = {'config': <_pytest.config.Config object at [hex]>, 'left': 'WAS_OVERWRITTEN CTX_KEY C...yyy\n1 var_997 yyy\n1 var_998 yyy\n1 var_999 yyy'}
2025-07-01 05:44:06.739 firstresult = False
2025-07-01 05:44:06.750
2025-07-01 05:44:06.758 def _hookexec(
2025-07-01 05:44:06.765 self,
2025-07-01 05:44:06.771 hook_name: str,
2025-07-01 05:44:06.777 methods: Sequence[HookImpl],
2025-07-01 05:44:06.783 kwargs: Mapping[str, object],
2025-07-01 05:44:06.789 firstresult: bool,
2025-07-01 05:44:06.800 ) -> object | list[object]:
2025-07-01 05:44:06.811 # called from all hookcaller instances.
2025-07-01 05:44:06.822 # enable_tracing will set its own wrapping function at self._inner_hookexec
2025-07-01 05:44:06.831 > return self._inner_hookexec(hook_name, methods, kwargs, firstresult)
2025-07-01 05:44:06.838
2025-07-01 05:44:06.847 ../lib/python3.11/site-packages/pluggy/_manager.py:115:
2025-07-01 05:44:06.858 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:44:06.867
2025-07-01 05:44:06.875 config = <_pytest.config.Config object at [hex]>, op = '=='
2025-07-01 05:44:06.883 left = 'WAS_OVERWRITTEN CTX_KEY CTX_VAL\n=============== ============================== =======\n1var_... yyy\n1var_997 yyy\n1var_998 yyy\n1var_999 yyy'
2025-07-01 05:44:06.891 right = 'WAS_OVERWRITTEN CTX_KEY CTX_VAL\n=============== ============================== =======\n1 var... yyy\n1 var_997 yyy\n1 var_998 yyy\n1 var_999 yyy'
2025-07-01 05:44:06.896
2025-07-01 05:44:06.902 def pytest_assertrepr_compare(config: Config, op: str, left: object, right: object) -> Optional[List[str]]:
2025-07-01 05:44:06.909 """Returns explanation for comparisons in failing assert expressions.
2025-07-01 05:44:06.914
2025-07-01 05:44:06.920 If both objects are `str`, uses `difflib.ndiff` to provide explanation.
2025-07-01 05:44:06.927 """
2025-07-01 05:44:06.937 if isinstance(left, str) and isinstance(right, str) and op == "==":
2025-07-01 05:44:06.945 # 16.11.2023, pzotov: we have to put empty string at the beginning of each comparing lists.
2025-07-01 05:44:06.955 # Otherwise first diff will be at the same line as 'assert' phrase, which causes readability be poor.
2025-07-01 05:44:06.969 #
2025-07-01 05:44:06.977 left_lines = ['']
2025-07-01 05:44:06.985 left_lines.extend(left.splitlines())
2025-07-01 05:44:06.991 right_lines = ['']
2025-07-01 05:44:06.996 right_lines.extend(right.splitlines())
2025-07-01 05:44:07.001
2025-07-01 05:44:07.005 # 16.11.2023, pzotov
2025-07-01 05:44:07.010 # ndiff output must be interpreted as following:
2025-07-01 05:44:07.014 # * "E - <some text>" ==> MISSED line (it was in EXPECTED text but absent in actual one).
2025-07-01 05:44:07.019 # * "E + <some_text>" ==> EXCESSIVE line (it is not in EXPECTED text but did appear in actual).
2025-07-01 05:44:07.023 # But for QA-purposes, this output must answer the question:
2025-07-01 05:44:07.028 # "what must be changed in ACTUAL output so that it became equal to EXPECTED"
2025-07-01 05:44:07.033 # (i.e. how to "REVERT" actual back to expected).
2025-07-01 05:44:07.039 # In order to see such result, we have to specify 'right_lines' to the 1st argument that is passed to ndiff().
2025-07-01 05:44:07.044 # ::: NB :::
2025-07-01 05:44:07.048 # We assume that all tests are written so that ACTUAL output is left side in 'assert' statement and EXPECTED
2025-07-01 05:44:07.054 # is right side, e.g: assert act.clean_stdout == act.clean_expected_stdout
2025-07-01 05:44:07.066 # This requirement is CRUCIAL if we use ndiff() instead of default pytest comparison method!
2025-07-01 05:44:07.078 #
2025-07-01 05:44:07.087 > return list(ndiff(right_lines, left_lines))
2025-07-01 05:44:07.100
2025-07-01 05:44:07.113 src/firebird/qa/plugin.py:608:
2025-07-01 05:44:07.124 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:44:07.134
2025-07-01 05:44:07.143 self = <difflib.Differ object at [hex]>
2025-07-01 05:44:07.155 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:44:07.165 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:44:07.171
2025-07-01 05:44:07.176 def compare(self, a, b):
2025-07-01 05:44:07.180 r"""
2025-07-01 05:44:07.185 Compare two sequences of lines; generate the resulting delta.
2025-07-01 05:44:07.189
2025-07-01 05:44:07.195 Each sequence must contain individual single-line strings ending with
2025-07-01 05:44:07.201 newlines. Such sequences can be obtained from the `readlines()` method
2025-07-01 05:44:07.207 of file-like objects. The delta generated also consists of newline-
2025-07-01 05:44:07.213 terminated strings, ready to be printed as-is via the writelines()
2025-07-01 05:44:07.218 method of a file-like object.
2025-07-01 05:44:07.229
2025-07-01 05:44:07.241 Example:
2025-07-01 05:44:07.249
2025-07-01 05:44:07.261 >>> print(''.join(Differ().compare('one\ntwo\nthree\n'.splitlines(True),
2025-07-01 05:44:07.273 ... 'ore\ntree\nemu\n'.splitlines(True))),
2025-07-01 05:44:07.284 ... end="")
2025-07-01 05:44:07.294 - one
2025-07-01 05:44:07.315 + ore
2025-07-01 05:44:07.331 - two
2025-07-01 05:44:07.341 - three
2025-07-01 05:44:07.362 + tree
2025-07-01 05:44:07.373 + emu
2025-07-01 05:44:07.384 """
2025-07-01 05:44:07.391
2025-07-01 05:44:07.399 cruncher = SequenceMatcher(self.linejunk, a, b)
2025-07-01 05:44:07.407 for tag, alo, ahi, blo, bhi in cruncher.get_opcodes():
2025-07-01 05:44:07.414 if tag == 'replace':
2025-07-01 05:44:07.421 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:44:07.427 elif tag == 'delete':
2025-07-01 05:44:07.435 g = self._dump('-', a, alo, ahi)
2025-07-01 05:44:07.442 elif tag == 'insert':
2025-07-01 05:44:07.448 g = self._dump('+', b, blo, bhi)
2025-07-01 05:44:07.453 elif tag == 'equal':
2025-07-01 05:44:07.458 g = self._dump(' ', a, alo, ahi)
2025-07-01 05:44:07.468 else:
2025-07-01 05:44:07.476 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:44:07.484
2025-07-01 05:44:07.490 > yield from g
2025-07-01 05:44:07.496
2025-07-01 05:44:07.508 /usr/lib/python3.11/difflib.py:872:
2025-07-01 05:44:07.515 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:44:07.521
2025-07-01 05:44:07.526 self = <difflib.Differ object at [hex]>
2025-07-01 05:44:07.531 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:44:07.535 alo = 3, ahi = 1101
2025-07-01 05:44:07.540 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:44:07.545 blo = 3, bhi = 1101
2025-07-01 05:44:07.549
2025-07-01 05:44:07.553 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:44:07.558 r"""
2025-07-01 05:44:07.562 When replacing one block of lines with another, search the blocks
2025-07-01 05:44:07.566 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:44:07.571 synch point, and intraline difference marking is done on the
2025-07-01 05:44:07.575 similar pair. Lots of work, but often worth it.
2025-07-01 05:44:07.580
2025-07-01 05:44:07.584 Example:
2025-07-01 05:44:07.588
2025-07-01 05:44:07.593 >>> d = Differ()
2025-07-01 05:44:07.598 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:44:07.602 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:44:07.607 >>> print(''.join(results), end="")
2025-07-01 05:44:07.611 - abcDefghiJkl
2025-07-01 05:44:07.621 + abcdefGhijkl
2025-07-01 05:44:07.632 """
2025-07-01 05:44:07.639
2025-07-01 05:44:07.650 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:44:07.658 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:44:07.665 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:44:07.672 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:44:07.678 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:44:07.683
2025-07-01 05:44:07.696 # search for the pair that matches best without being identical
2025-07-01 05:44:07.706 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:44:07.712 # on junk -- unless we have to)
2025-07-01 05:44:07.717 for j in range(blo, bhi):
2025-07-01 05:44:07.725 bj = b[j]
2025-07-01 05:44:07.730 cruncher.set_seq2(bj)
2025-07-01 05:44:07.740 for i in range(alo, ahi):
2025-07-01 05:44:07.749 ai = a[i]
2025-07-01 05:44:07.761 if ai == bj:
2025-07-01 05:44:07.772 if eqi is None:
2025-07-01 05:44:07.783 eqi, eqj = i, j
2025-07-01 05:44:07.795 continue
2025-07-01 05:44:07.804 cruncher.set_seq1(ai)
2025-07-01 05:44:07.812 # computing similarity is expensive, so use the quick
2025-07-01 05:44:07.819 # upper bounds first -- have seen this speed up messy
2025-07-01 05:44:07.826 # compares by a factor of 3.
2025-07-01 05:44:07.836 # note that ratio() is only expensive to compute the first
2025-07-01 05:44:07.844 # time it's called on a sequence pair; the expensive part
2025-07-01 05:44:07.851 # of the computation is cached by cruncher
2025-07-01 05:44:07.857 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:44:07.868 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:44:07.881 cruncher.ratio() > best_ratio:
2025-07-01 05:44:07.891 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:44:07.903 if best_ratio < cutoff:
2025-07-01 05:44:07.916 # no non-identical "pretty close" pair
2025-07-01 05:44:07.925 if eqi is None:
2025-07-01 05:44:07.936 # no identical pair either -- treat it as a straight replace
2025-07-01 05:44:07.944 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:44:07.951 return
2025-07-01 05:44:07.960 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:44:07.969 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:44:07.976 else:
2025-07-01 05:44:07.983 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:44:07.990 eqi = None
2025-07-01 05:44:08.002
2025-07-01 05:44:08.017 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:44:08.025 # identical
2025-07-01 05:44:08.031
2025-07-01 05:44:08.036 # pump out diffs from before the synch point
2025-07-01 05:44:08.042 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:44:08.047
2025-07-01 05:44:08.053 # do intraline marking on the synch pair
2025-07-01 05:44:08.058 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:44:08.066 if eqi is None:
2025-07-01 05:44:08.074 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:44:08.083 atags = btags = ""
2025-07-01 05:44:08.094 cruncher.set_seqs(aelt, belt)
2025-07-01 05:44:08.104 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:44:08.116 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:44:08.126 if tag == 'replace':
2025-07-01 05:44:08.136 atags += '^' * la
2025-07-01 05:44:08.145 btags += '^' * lb
2025-07-01 05:44:08.155 elif tag == 'delete':
2025-07-01 05:44:08.166 atags += '-' * la
2025-07-01 05:44:08.177 elif tag == 'insert':
2025-07-01 05:44:08.187 btags += '+' * lb
2025-07-01 05:44:08.195 elif tag == 'equal':
2025-07-01 05:44:08.204 atags += ' ' * la
2025-07-01 05:44:08.210 btags += ' ' * lb
2025-07-01 05:44:08.221 else:
2025-07-01 05:44:08.233 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:44:08.243 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:44:08.251 else:
2025-07-01 05:44:08.259 # the synch pair is identical
2025-07-01 05:44:08.266 yield ' ' + aelt
2025-07-01 05:44:08.277
2025-07-01 05:44:08.286 # pump out diffs from after the synch point
2025-07-01 05:44:08.293 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:44:08.300
2025-07-01 05:44:08.306 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:44:08.313 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:44:08.318
2025-07-01 05:44:08.324 self = <difflib.Differ object at [hex]>
2025-07-01 05:44:08.331 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:44:08.337 alo = 4, ahi = 1101
2025-07-01 05:44:08.344 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:44:08.350 blo = 4, bhi = 1101
2025-07-01 05:44:08.355
2025-07-01 05:44:08.363 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:44:08.373 g = []
2025-07-01 05:44:08.381 if alo < ahi:
2025-07-01 05:44:08.389 if blo < bhi:
2025-07-01 05:44:08.399 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:44:08.405 else:
2025-07-01 05:44:08.412 g = self._dump('-', a, alo, ahi)
2025-07-01 05:44:08.419 elif blo < bhi:
2025-07-01 05:44:08.426 g = self._dump('+', b, blo, bhi)
2025-07-01 05:44:08.432
2025-07-01 05:44:08.439 > yield from g
2025-07-01 05:44:08.445
2025-07-01 05:44:08.456 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:44:08.466 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:44:08.473
2025-07-01 05:44:08.479 self = <difflib.Differ object at [hex]>
2025-07-01 05:44:08.486 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:44:08.491 alo = 4, ahi = 1101
2025-07-01 05:44:08.498 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:44:08.507 blo = 4, bhi = 1101
2025-07-01 05:44:08.519
2025-07-01 05:44:08.527 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:44:08.532 r"""
2025-07-01 05:44:08.544 When replacing one block of lines with another, search the blocks
2025-07-01 05:44:08.554 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:44:08.562 synch point, and intraline difference marking is done on the
2025-07-01 05:44:08.570 similar pair. Lots of work, but often worth it.
2025-07-01 05:44:08.576
2025-07-01 05:44:08.582 Example:
2025-07-01 05:44:08.587
2025-07-01 05:44:08.593 >>> d = Differ()
2025-07-01 05:44:08.601 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:44:08.608 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:44:08.614 >>> print(''.join(results), end="")
2025-07-01 05:44:08.620 - abcDefghiJkl
2025-07-01 05:44:08.631 + abcdefGhijkl
2025-07-01 05:44:08.641 """
2025-07-01 05:44:08.645
2025-07-01 05:44:08.650 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:44:08.657 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:44:08.665 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:44:08.671 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:44:08.679 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:44:08.686
2025-07-01 05:44:08.693 # search for the pair that matches best without being identical
2025-07-01 05:44:08.701 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:44:08.708 # on junk -- unless we have to)
2025-07-01 05:44:08.716 for j in range(blo, bhi):
2025-07-01 05:44:08.724 bj = b[j]
2025-07-01 05:44:08.733 cruncher.set_seq2(bj)
2025-07-01 05:44:08.738 for i in range(alo, ahi):
2025-07-01 05:44:08.747 ai = a[i]
2025-07-01 05:44:08.758 if ai == bj:
2025-07-01 05:44:08.768 if eqi is None:
2025-07-01 05:44:08.776 eqi, eqj = i, j
2025-07-01 05:44:08.782 continue
2025-07-01 05:44:08.787 cruncher.set_seq1(ai)
2025-07-01 05:44:08.791 # computing similarity is expensive, so use the quick
2025-07-01 05:44:08.796 # upper bounds first -- have seen this speed up messy
2025-07-01 05:44:08.800 # compares by a factor of 3.
2025-07-01 05:44:08.805 # note that ratio() is only expensive to compute the first
2025-07-01 05:44:08.811 # time it's called on a sequence pair; the expensive part
2025-07-01 05:44:08.816 # of the computation is cached by cruncher
2025-07-01 05:44:08.821 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:44:08.826 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:44:08.831 cruncher.ratio() > best_ratio:
2025-07-01 05:44:08.838 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:44:08.844 if best_ratio < cutoff:
2025-07-01 05:44:08.850 # no non-identical "pretty close" pair
2025-07-01 05:44:08.855 if eqi is None:
2025-07-01 05:44:08.861 # no identical pair either -- treat it as a straight replace
2025-07-01 05:44:08.870 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:44:08.877 return
2025-07-01 05:44:08.883 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:44:08.894 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:44:08.905 else:
2025-07-01 05:44:08.916 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:44:08.924 eqi = None
2025-07-01 05:44:08.931
2025-07-01 05:44:08.938 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:44:08.948 # identical
2025-07-01 05:44:08.957
2025-07-01 05:44:08.965 # pump out diffs from before the synch point
2025-07-01 05:44:08.971 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:44:08.979
2025-07-01 05:44:08.988 # do intraline marking on the synch pair
2025-07-01 05:44:08.997 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:44:09.004 if eqi is None:
2025-07-01 05:44:09.011 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:44:09.017 atags = btags = ""
2025-07-01 05:44:09.023 cruncher.set_seqs(aelt, belt)
2025-07-01 05:44:09.030 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:44:09.039 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:44:09.045 if tag == 'replace':
2025-07-01 05:44:09.051 atags += '^' * la
2025-07-01 05:44:09.057 btags += '^' * lb
2025-07-01 05:44:09.062 elif tag == 'delete':
2025-07-01 05:44:09.067 atags += '-' * la
2025-07-01 05:44:09.073 elif tag == 'insert':
2025-07-01 05:44:09.078 btags += '+' * lb
2025-07-01 05:44:09.089 elif tag == 'equal':
2025-07-01 05:44:09.098 atags += ' ' * la
2025-07-01 05:44:09.106 btags += ' ' * lb
2025-07-01 05:44:09.114 else:
2025-07-01 05:44:09.123 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:44:09.134 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:44:09.143 else:
2025-07-01 05:44:09.151 # the synch pair is identical
2025-07-01 05:44:09.158 yield ' ' + aelt
2025-07-01 05:44:09.164
2025-07-01 05:44:09.170 # pump out diffs from after the synch point
2025-07-01 05:44:09.176 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:44:09.181
2025-07-01 05:44:09.187 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:44:09.194 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:44:09.204
2025-07-01 05:44:09.212 self = <difflib.Differ object at [hex]>
2025-07-01 05:44:09.220 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:44:09.226 alo = 5, ahi = 1101
2025-07-01 05:44:09.233 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:44:09.244 blo = 5, bhi = 1101
2025-07-01 05:44:09.256
2025-07-01 05:44:09.267 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:44:09.273 g = []
2025-07-01 05:44:09.278 if alo < ahi:
2025-07-01 05:44:09.282 if blo < bhi:
2025-07-01 05:44:09.288 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:44:09.295 else:
2025-07-01 05:44:09.303 g = self._dump('-', a, alo, ahi)
2025-07-01 05:44:09.313 elif blo < bhi:
2025-07-01 05:44:09.322 g = self._dump('+', b, blo, bhi)
2025-07-01 05:44:09.328
2025-07-01 05:44:09.334 > yield from g
2025-07-01 05:44:09.339
2025-07-01 05:44:09.345 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:44:09.351 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:44:09.358
2025-07-01 05:44:09.369 self = <difflib.Differ object at [hex]>
2025-07-01 05:44:09.378 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:44:09.384 alo = 5, ahi = 1101
2025-07-01 05:44:09.392 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:44:09.399 blo = 5, bhi = 1101
2025-07-01 05:44:09.406
2025-07-01 05:44:09.413 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:44:09.420 r"""
2025-07-01 05:44:09.426 When replacing one block of lines with another, search the blocks
2025-07-01 05:44:09.433 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:44:09.439 synch point, and intraline difference marking is done on the
2025-07-01 05:44:09.447 similar pair. Lots of work, but often worth it.
2025-07-01 05:44:09.453
2025-07-01 05:44:09.459 Example:
2025-07-01 05:44:09.467
2025-07-01 05:44:09.477 >>> d = Differ()
2025-07-01 05:44:09.485 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:44:09.491 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:44:09.497 >>> print(''.join(results), end="")
2025-07-01 05:44:09.502 - abcDefghiJkl
2025-07-01 05:44:09.514 + abcdefGhijkl
2025-07-01 05:44:09.525 """
2025-07-01 05:44:09.530
2025-07-01 05:44:09.536 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:44:09.543 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:44:09.548 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:44:09.554 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:44:09.564 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:44:09.574
2025-07-01 05:44:09.581 # search for the pair that matches best without being identical
2025-07-01 05:44:09.587 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:44:09.595 # on junk -- unless we have to)
2025-07-01 05:44:09.605 for j in range(blo, bhi):
2025-07-01 05:44:09.616 bj = b[j]
2025-07-01 05:44:09.627 cruncher.set_seq2(bj)
2025-07-01 05:44:09.634 for i in range(alo, ahi):
2025-07-01 05:44:09.646 ai = a[i]
2025-07-01 05:44:09.658 if ai == bj:
2025-07-01 05:44:09.670 if eqi is None:
2025-07-01 05:44:09.683 eqi, eqj = i, j
2025-07-01 05:44:09.692 continue
2025-07-01 05:44:09.701 cruncher.set_seq1(ai)
2025-07-01 05:44:09.712 # computing similarity is expensive, so use the quick
2025-07-01 05:44:09.721 # upper bounds first -- have seen this speed up messy
2025-07-01 05:44:09.728 # compares by a factor of 3.
2025-07-01 05:44:09.734 # note that ratio() is only expensive to compute the first
2025-07-01 05:44:09.747 # time it's called on a sequence pair; the expensive part
2025-07-01 05:44:09.759 # of the computation is cached by cruncher
2025-07-01 05:44:09.769 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:44:09.777 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:44:09.784 cruncher.ratio() > best_ratio:
2025-07-01 05:44:09.792 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:44:09.798 if best_ratio < cutoff:
2025-07-01 05:44:09.804 # no non-identical "pretty close" pair
2025-07-01 05:44:09.810 if eqi is None:
2025-07-01 05:44:09.816 # no identical pair either -- treat it as a straight replace
2025-07-01 05:44:09.822 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:44:09.835 return
2025-07-01 05:44:09.846 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:44:09.856 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:44:09.862 else:
2025-07-01 05:44:09.868 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:44:09.874 eqi = None
2025-07-01 05:44:09.881
2025-07-01 05:44:09.885 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:44:09.890 # identical
2025-07-01 05:44:09.894
2025-07-01 05:44:09.902 # pump out diffs from before the synch point
2025-07-01 05:44:09.908 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:44:09.914
2025-07-01 05:44:09.921 # do intraline marking on the synch pair
2025-07-01 05:44:09.929 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:44:09.935 if eqi is None:
2025-07-01 05:44:09.943 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:44:09.956 atags = btags = ""
2025-07-01 05:44:09.965 cruncher.set_seqs(aelt, belt)
2025-07-01 05:44:09.974 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:44:09.980 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:44:09.987 if tag == 'replace':
2025-07-01 05:44:09.994 atags += '^' * la
2025-07-01 05:44:10.003 btags += '^' * lb
2025-07-01 05:44:10.012 elif tag == 'delete':
2025-07-01 05:44:10.025 atags += '-' * la
2025-07-01 05:44:10.037 elif tag == 'insert':
2025-07-01 05:44:10.050 btags += '+' * lb
2025-07-01 05:44:10.061 elif tag == 'equal':
2025-07-01 05:44:10.074 atags += ' ' * la
2025-07-01 05:44:10.084 btags += ' ' * lb
2025-07-01 05:44:10.091 else:
2025-07-01 05:44:10.099 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:44:10.111 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:44:10.122 else:
2025-07-01 05:44:10.131 # the synch pair is identical
2025-07-01 05:44:10.141 yield ' ' + aelt
2025-07-01 05:44:10.148
2025-07-01 05:44:10.155 # pump out diffs from after the synch point
2025-07-01 05:44:10.162 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:44:10.167
2025-07-01 05:44:10.175 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:44:10.186 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:44:10.197
2025-07-01 05:44:10.208 self = <difflib.Differ object at [hex]>
2025-07-01 05:44:10.220 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:44:10.230 alo = 6, ahi = 1101
2025-07-01 05:44:10.240 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:44:10.247 blo = 6, bhi = 1101
2025-07-01 05:44:10.254
2025-07-01 05:44:10.260 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:44:10.267 g = []
2025-07-01 05:44:10.277 if alo < ahi:
2025-07-01 05:44:10.288 if blo < bhi:
2025-07-01 05:44:10.298 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:44:10.309 else:
2025-07-01 05:44:10.319 g = self._dump('-', a, alo, ahi)
2025-07-01 05:44:10.327 elif blo < bhi:
2025-07-01 05:44:10.334 g = self._dump('+', b, blo, bhi)
2025-07-01 05:44:10.344
2025-07-01 05:44:10.356 > yield from g
2025-07-01 05:44:10.364
2025-07-01 05:44:10.371 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:44:10.378 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:44:10.384
2025-07-01 05:44:10.391 self = <difflib.Differ object at [hex]>
2025-07-01 05:44:10.399 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:44:10.409 alo = 6, ahi = 1101
2025-07-01 05:44:10.418 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:44:10.426 blo = 6, bhi = 1101
2025-07-01 05:44:10.435
2025-07-01 05:44:10.445 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:44:10.454 r"""
2025-07-01 05:44:10.465 When replacing one block of lines with another, search the blocks
2025-07-01 05:44:10.474 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:44:10.483 synch point, and intraline difference marking is done on the
2025-07-01 05:44:10.493 similar pair. Lots of work, but often worth it.
2025-07-01 05:44:10.501
2025-07-01 05:44:10.507 Example:
2025-07-01 05:44:10.513
2025-07-01 05:44:10.518 >>> d = Differ()
2025-07-01 05:44:10.528 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:44:10.539 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:44:10.546 >>> print(''.join(results), end="")
2025-07-01 05:44:10.558 - abcDefghiJkl
2025-07-01 05:44:10.580 + abcdefGhijkl
2025-07-01 05:44:10.600 """
2025-07-01 05:44:10.611
2025-07-01 05:44:10.620 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:44:10.627 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:44:10.634 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:44:10.640 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:44:10.646 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:44:10.656
2025-07-01 05:44:10.667 # search for the pair that matches best without being identical
2025-07-01 05:44:10.674 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:44:10.681 # on junk -- unless we have to)
2025-07-01 05:44:10.687 for j in range(blo, bhi):
2025-07-01 05:44:10.692 bj = b[j]
2025-07-01 05:44:10.698 cruncher.set_seq2(bj)
2025-07-01 05:44:10.709 for i in range(alo, ahi):
2025-07-01 05:44:10.720 ai = a[i]
2025-07-01 05:44:10.730 if ai == bj:
2025-07-01 05:44:10.740 if eqi is None:
2025-07-01 05:44:10.750 eqi, eqj = i, j
2025-07-01 05:44:10.762 continue
2025-07-01 05:44:10.772 cruncher.set_seq1(ai)
2025-07-01 05:44:10.782 # computing similarity is expensive, so use the quick
2025-07-01 05:44:10.790 # upper bounds first -- have seen this speed up messy
2025-07-01 05:44:10.796 # compares by a factor of 3.
2025-07-01 05:44:10.802 # note that ratio() is only expensive to compute the first
2025-07-01 05:44:10.814 # time it's called on a sequence pair; the expensive part
2025-07-01 05:44:10.824 # of the computation is cached by cruncher
2025-07-01 05:44:10.833 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:44:10.840 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:44:10.847 cruncher.ratio() > best_ratio:
2025-07-01 05:44:10.859 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:44:10.872 if best_ratio < cutoff:
2025-07-01 05:44:10.881 # no non-identical "pretty close" pair
2025-07-01 05:44:10.887 if eqi is None:
2025-07-01 05:44:10.894 # no identical pair either -- treat it as a straight replace
2025-07-01 05:44:10.900 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:44:10.906 return
2025-07-01 05:44:10.912 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:44:10.919 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:44:10.929 else:
2025-07-01 05:44:10.942 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:44:10.951 eqi = None
2025-07-01 05:44:10.958
2025-07-01 05:44:10.966 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:44:10.973 # identical
2025-07-01 05:44:10.980
2025-07-01 05:44:10.987 # pump out diffs from before the synch point
2025-07-01 05:44:10.994 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:44:11.000
2025-07-01 05:44:11.006 # do intraline marking on the synch pair
2025-07-01 05:44:11.012 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:44:11.019 if eqi is None:
2025-07-01 05:44:11.027 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:44:11.035 atags = btags = ""
2025-07-01 05:44:11.046 cruncher.set_seqs(aelt, belt)
2025-07-01 05:44:11.058 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:44:11.067 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:44:11.074 if tag == 'replace':
2025-07-01 05:44:11.080 atags += '^' * la
2025-07-01 05:44:11.087 btags += '^' * lb
2025-07-01 05:44:11.100 elif tag == 'delete':
2025-07-01 05:44:11.109 atags += '-' * la
2025-07-01 05:44:11.116 elif tag == 'insert':
2025-07-01 05:44:11.123 btags += '+' * lb
2025-07-01 05:44:11.131 elif tag == 'equal':
2025-07-01 05:44:11.139 atags += ' ' * la
2025-07-01 05:44:11.146 btags += ' ' * lb
2025-07-01 05:44:11.152 else:
2025-07-01 05:44:11.158 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:44:11.167 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:44:11.174 else:
2025-07-01 05:44:11.183 # the synch pair is identical
2025-07-01 05:44:11.195 yield ' ' + aelt
2025-07-01 05:44:11.204
2025-07-01 05:44:11.216 # pump out diffs from after the synch point
2025-07-01 05:44:11.227 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:44:11.237
2025-07-01 05:44:11.246 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:44:11.256 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:44:11.266
2025-07-01 05:44:11.274 self = <difflib.Differ object at [hex]>
2025-07-01 05:44:11.286 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:44:11.295 alo = 7, ahi = 1101
2025-07-01 05:44:11.303 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:44:11.311 blo = 7, bhi = 1101
2025-07-01 05:44:11.322
2025-07-01 05:44:11.331 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:44:11.338 g = []
2025-07-01 05:44:11.350 if alo < ahi:
2025-07-01 05:44:11.362 if blo < bhi:
2025-07-01 05:44:11.372 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:44:11.381 else:
2025-07-01 05:44:11.392 g = self._dump('-', a, alo, ahi)
2025-07-01 05:44:11.403 elif blo < bhi:
2025-07-01 05:44:11.411 g = self._dump('+', b, blo, bhi)
2025-07-01 05:44:11.423
2025-07-01 05:44:11.435 > yield from g
2025-07-01 05:44:11.447
2025-07-01 05:44:11.456 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:44:11.471 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:44:11.480
2025-07-01 05:44:11.488 self = <difflib.Differ object at [hex]>
2025-07-01 05:44:11.502 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:44:11.510 alo = 7, ahi = 1101
2025-07-01 05:44:11.519 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:44:11.526 blo = 7, bhi = 1101
2025-07-01 05:44:11.533
2025-07-01 05:44:11.542 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:44:11.548 r"""
2025-07-01 05:44:11.554 When replacing one block of lines with another, search the blocks
2025-07-01 05:44:11.558 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:44:11.566 synch point, and intraline difference marking is done on the
2025-07-01 05:44:11.573 similar pair. Lots of work, but often worth it.
2025-07-01 05:44:11.580
2025-07-01 05:44:11.588 Example:
2025-07-01 05:44:11.598
2025-07-01 05:44:11.605 >>> d = Differ()
2025-07-01 05:44:11.612 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:44:11.618 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:44:11.628 >>> print(''.join(results), end="")
2025-07-01 05:44:11.637 - abcDefghiJkl
2025-07-01 05:44:11.651 + abcdefGhijkl
2025-07-01 05:44:11.672 """
2025-07-01 05:44:11.680
2025-07-01 05:44:11.686 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:44:11.691 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:44:11.696 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:44:11.700 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:44:11.705 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:44:11.711
2025-07-01 05:44:11.718 # search for the pair that matches best without being identical
2025-07-01 05:44:11.727 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:44:11.738 # on junk -- unless we have to)
2025-07-01 05:44:11.751 for j in range(blo, bhi):
2025-07-01 05:44:11.761 bj = b[j]
2025-07-01 05:44:11.768 cruncher.set_seq2(bj)
2025-07-01 05:44:11.775 for i in range(alo, ahi):
2025-07-01 05:44:11.782 ai = a[i]
2025-07-01 05:44:11.791 if ai == bj:
2025-07-01 05:44:11.800 if eqi is None:
2025-07-01 05:44:11.807 eqi, eqj = i, j
2025-07-01 05:44:11.813 continue
2025-07-01 05:44:11.819 cruncher.set_seq1(ai)
2025-07-01 05:44:11.825 # computing similarity is expensive, so use the quick
2025-07-01 05:44:11.831 # upper bounds first -- have seen this speed up messy
2025-07-01 05:44:11.840 # compares by a factor of 3.
2025-07-01 05:44:11.847 # note that ratio() is only expensive to compute the first
2025-07-01 05:44:11.854 # time it's called on a sequence pair; the expensive part
2025-07-01 05:44:11.863 # of the computation is cached by cruncher
2025-07-01 05:44:11.874 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:44:11.881 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:44:11.887 cruncher.ratio() > best_ratio:
2025-07-01 05:44:11.893 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:44:11.898 if best_ratio < cutoff:
2025-07-01 05:44:11.903 # no non-identical "pretty close" pair
2025-07-01 05:44:11.907 if eqi is None:
2025-07-01 05:44:11.912 # no identical pair either -- treat it as a straight replace
2025-07-01 05:44:11.917 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:44:11.923 return
2025-07-01 05:44:11.928 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:44:11.934 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:44:11.946 else:
2025-07-01 05:44:11.956 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:44:11.964 eqi = None
2025-07-01 05:44:11.969
2025-07-01 05:44:11.975 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:44:11.987 # identical
2025-07-01 05:44:11.994
2025-07-01 05:44:12.005 # pump out diffs from before the synch point
2025-07-01 05:44:12.012 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:44:12.022
2025-07-01 05:44:12.030 # do intraline marking on the synch pair
2025-07-01 05:44:12.037 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:44:12.042 if eqi is None:
2025-07-01 05:44:12.049 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:44:12.055 atags = btags = ""
2025-07-01 05:44:12.062 cruncher.set_seqs(aelt, belt)
2025-07-01 05:44:12.069 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:44:12.076 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:44:12.083 if tag == 'replace':
2025-07-01 05:44:12.090 atags += '^' * la
2025-07-01 05:44:12.097 btags += '^' * lb
2025-07-01 05:44:12.109 elif tag == 'delete':
2025-07-01 05:44:12.119 atags += '-' * la
2025-07-01 05:44:12.126 elif tag == 'insert':
2025-07-01 05:44:12.134 btags += '+' * lb
2025-07-01 05:44:12.145 elif tag == 'equal':
2025-07-01 05:44:12.153 atags += ' ' * la
2025-07-01 05:44:12.164 btags += ' ' * lb
2025-07-01 05:44:12.174 else:
2025-07-01 05:44:12.183 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:44:12.190 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:44:12.201 else:
2025-07-01 05:44:12.213 # the synch pair is identical
2025-07-01 05:44:12.222 yield ' ' + aelt
2025-07-01 05:44:12.235
2025-07-01 05:44:12.244 # pump out diffs from after the synch point
2025-07-01 05:44:12.253 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:44:12.259
2025-07-01 05:44:12.269 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:44:12.280 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:44:12.290
2025-07-01 05:44:12.299 self = <difflib.Differ object at [hex]>
2025-07-01 05:44:12.309 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:44:12.315 alo = 8, ahi = 1101
2025-07-01 05:44:12.323 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:44:12.329 blo = 8, bhi = 1101
2025-07-01 05:44:12.335
2025-07-01 05:44:12.341 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:44:12.347 g = []
2025-07-01 05:44:12.353 if alo < ahi:
2025-07-01 05:44:12.360 if blo < bhi:
2025-07-01 05:44:12.367 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:44:12.374 else:
2025-07-01 05:44:12.385 g = self._dump('-', a, alo, ahi)
2025-07-01 05:44:12.396 elif blo < bhi:
2025-07-01 05:44:12.403 g = self._dump('+', b, blo, bhi)
2025-07-01 05:44:12.409
2025-07-01 05:44:12.414 > yield from g
2025-07-01 05:44:12.419
2025-07-01 05:44:12.424 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:44:12.430 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:44:12.436
2025-07-01 05:44:12.443 self = <difflib.Differ object at [hex]>
2025-07-01 05:44:12.451 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:44:12.461 alo = 8, ahi = 1101
2025-07-01 05:44:12.470 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:44:12.475 blo = 8, bhi = 1101
2025-07-01 05:44:12.481
2025-07-01 05:44:12.489 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:44:12.496 r"""
2025-07-01 05:44:12.506 When replacing one block of lines with another, search the blocks
2025-07-01 05:44:12.516 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:44:12.524 synch point, and intraline difference marking is done on the
2025-07-01 05:44:12.529 similar pair. Lots of work, but often worth it.
2025-07-01 05:44:12.535
2025-07-01 05:44:12.541 Example:
2025-07-01 05:44:12.548
2025-07-01 05:44:12.555 >>> d = Differ()
2025-07-01 05:44:12.563 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:44:12.571 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:44:12.578 >>> print(''.join(results), end="")
2025-07-01 05:44:12.583 - abcDefghiJkl
2025-07-01 05:44:12.596 + abcdefGhijkl
2025-07-01 05:44:12.615 """
2025-07-01 05:44:12.626
2025-07-01 05:44:12.635 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:44:12.642 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:44:12.649 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:44:12.655 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:44:12.662 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:44:12.666
2025-07-01 05:44:12.671 # search for the pair that matches best without being identical
2025-07-01 05:44:12.677 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:44:12.684 # on junk -- unless we have to)
2025-07-01 05:44:12.691 for j in range(blo, bhi):
2025-07-01 05:44:12.707 bj = b[j]
2025-07-01 05:44:12.717 cruncher.set_seq2(bj)
2025-07-01 05:44:12.725 for i in range(alo, ahi):
2025-07-01 05:44:12.731 ai = a[i]
2025-07-01 05:44:12.738 if ai == bj:
2025-07-01 05:44:12.743 if eqi is None:
2025-07-01 05:44:12.748 eqi, eqj = i, j
2025-07-01 05:44:12.753 continue
2025-07-01 05:44:12.757 cruncher.set_seq1(ai)
2025-07-01 05:44:12.764 # computing similarity is expensive, so use the quick
2025-07-01 05:44:12.776 # upper bounds first -- have seen this speed up messy
2025-07-01 05:44:12.784 # compares by a factor of 3.
2025-07-01 05:44:12.793 # note that ratio() is only expensive to compute the first
2025-07-01 05:44:12.801 # time it's called on a sequence pair; the expensive part
2025-07-01 05:44:12.808 # of the computation is cached by cruncher
2025-07-01 05:44:12.822 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:44:12.831 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:44:12.837 cruncher.ratio() > best_ratio:
2025-07-01 05:44:12.843 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:44:12.848 if best_ratio < cutoff:
2025-07-01 05:44:12.853 # no non-identical "pretty close" pair
2025-07-01 05:44:12.859 if eqi is None:
2025-07-01 05:44:12.864 # no identical pair either -- treat it as a straight replace
2025-07-01 05:44:12.869 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:44:12.874 return
2025-07-01 05:44:12.878 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:44:12.883 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:44:12.888 else:
2025-07-01 05:44:12.894 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:44:12.905 eqi = None
2025-07-01 05:44:12.914
2025-07-01 05:44:12.924 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:44:12.936 # identical
2025-07-01 05:44:12.944
2025-07-01 05:44:12.950 # pump out diffs from before the synch point
2025-07-01 05:44:12.956 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:44:12.962
2025-07-01 05:44:12.968 # do intraline marking on the synch pair
2025-07-01 05:44:12.975 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:44:12.987 if eqi is None:
2025-07-01 05:44:12.999 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:44:13.009 atags = btags = ""
2025-07-01 05:44:13.014 cruncher.set_seqs(aelt, belt)
2025-07-01 05:44:13.019 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:44:13.025 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:44:13.030 if tag == 'replace':
2025-07-01 05:44:13.037 atags += '^' * la
2025-07-01 05:44:13.044 btags += '^' * lb
2025-07-01 05:44:13.050 elif tag == 'delete':
2025-07-01 05:44:13.056 atags += '-' * la
2025-07-01 05:44:13.061 elif tag == 'insert':
2025-07-01 05:44:13.067 btags += '+' * lb
2025-07-01 05:44:13.074 elif tag == 'equal':
2025-07-01 05:44:13.079 atags += ' ' * la
2025-07-01 05:44:13.084 btags += ' ' * lb
2025-07-01 05:44:13.090 else:
2025-07-01 05:44:13.095 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:44:13.100 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:44:13.105 else:
2025-07-01 05:44:13.114 # the synch pair is identical
2025-07-01 05:44:13.123 yield ' ' + aelt
2025-07-01 05:44:13.132
2025-07-01 05:44:13.139 # pump out diffs from after the synch point
2025-07-01 05:44:13.145 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:44:13.152
2025-07-01 05:44:13.158 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:44:13.167 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:44:13.178
2025-07-01 05:44:13.188 self = <difflib.Differ object at [hex]>
2025-07-01 05:44:13.199 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:44:13.210 alo = 9, ahi = 1101
2025-07-01 05:44:13.219 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:44:13.227 blo = 9, bhi = 1101
2025-07-01 05:44:13.233
2025-07-01 05:44:13.240 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:44:13.246 g = []
2025-07-01 05:44:13.252 if alo < ahi:
2025-07-01 05:44:13.258 if blo < bhi:
2025-07-01 05:44:13.270 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:44:13.277 else:
2025-07-01 05:44:13.284 g = self._dump('-', a, alo, ahi)
2025-07-01 05:44:13.290 elif blo < bhi:
2025-07-01 05:44:13.303 g = self._dump('+', b, blo, bhi)
2025-07-01 05:44:13.313
2025-07-01 05:44:13.322 > yield from g
2025-07-01 05:44:13.329
2025-07-01 05:44:13.341 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:44:13.349 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:44:13.360
2025-07-01 05:44:13.372 self = <difflib.Differ object at [hex]>
2025-07-01 05:44:13.383 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:44:13.394 alo = 9, ahi = 1101
2025-07-01 05:44:13.406 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:44:13.417 blo = 9, bhi = 1101
2025-07-01 05:44:13.428
2025-07-01 05:44:13.439 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:44:13.451 r"""
2025-07-01 05:44:13.459 When replacing one block of lines with another, search the blocks
2025-07-01 05:44:13.468 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:44:13.475 synch point, and intraline difference marking is done on the
2025-07-01 05:44:13.482 similar pair. Lots of work, but often worth it.
2025-07-01 05:44:13.491
2025-07-01 05:44:13.503 Example:
2025-07-01 05:44:13.512
2025-07-01 05:44:13.518 >>> d = Differ()
2025-07-01 05:44:13.527 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:44:13.537 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:44:13.545 >>> print(''.join(results), end="")
2025-07-01 05:44:13.553 - abcDefghiJkl
2025-07-01 05:44:13.569 + abcdefGhijkl
2025-07-01 05:44:13.583 """
2025-07-01 05:44:13.596
2025-07-01 05:44:13.606 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:44:13.614 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:44:13.621 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:44:13.628 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:44:13.635 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:44:13.643
2025-07-01 05:44:13.654 # search for the pair that matches best without being identical
2025-07-01 05:44:13.662 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:44:13.671 # on junk -- unless we have to)
2025-07-01 05:44:13.681 for j in range(blo, bhi):
2025-07-01 05:44:13.690 bj = b[j]
2025-07-01 05:44:13.699 cruncher.set_seq2(bj)
2025-07-01 05:44:13.706 for i in range(alo, ahi):
2025-07-01 05:44:13.711 ai = a[i]
2025-07-01 05:44:13.722 if ai == bj:
2025-07-01 05:44:13.732 if eqi is None:
2025-07-01 05:44:13.740 eqi, eqj = i, j
2025-07-01 05:44:13.747 continue
2025-07-01 05:44:13.755 cruncher.set_seq1(ai)
2025-07-01 05:44:13.767 # computing similarity is expensive, so use the quick
2025-07-01 05:44:13.774 # upper bounds first -- have seen this speed up messy
2025-07-01 05:44:13.781 # compares by a factor of 3.
2025-07-01 05:44:13.786 # note that ratio() is only expensive to compute the first
2025-07-01 05:44:13.792 # time it's called on a sequence pair; the expensive part
2025-07-01 05:44:13.797 # of the computation is cached by cruncher
2025-07-01 05:44:13.803 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:44:13.809 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:44:13.818 cruncher.ratio() > best_ratio:
2025-07-01 05:44:13.829 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:44:13.839 if best_ratio < cutoff:
2025-07-01 05:44:13.849 # no non-identical "pretty close" pair
2025-07-01 05:44:13.858 if eqi is None:
2025-07-01 05:44:13.868 # no identical pair either -- treat it as a straight replace
2025-07-01 05:44:13.878 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:44:13.886 return
2025-07-01 05:44:13.895 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:44:13.903 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:44:13.910 else:
2025-07-01 05:44:13.918 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:44:13.929 eqi = None
2025-07-01 05:44:13.937
2025-07-01 05:44:13.944 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:44:13.950 # identical
2025-07-01 05:44:13.962
2025-07-01 05:44:13.972 # pump out diffs from before the synch point
2025-07-01 05:44:13.980 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:44:13.986
2025-07-01 05:44:13.999 # do intraline marking on the synch pair
2025-07-01 05:44:14.009 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:44:14.018 if eqi is None:
2025-07-01 05:44:14.026 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:44:14.034 atags = btags = ""
2025-07-01 05:44:14.040 cruncher.set_seqs(aelt, belt)
2025-07-01 05:44:14.046 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:44:14.051 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:44:14.057 if tag == 'replace':
2025-07-01 05:44:14.064 atags += '^' * la
2025-07-01 05:44:14.070 btags += '^' * lb
2025-07-01 05:44:14.082 elif tag == 'delete':
2025-07-01 05:44:14.092 atags += '-' * la
2025-07-01 05:44:14.098 elif tag == 'insert':
2025-07-01 05:44:14.105 btags += '+' * lb
2025-07-01 05:44:14.112 elif tag == 'equal':
2025-07-01 05:44:14.119 atags += ' ' * la
2025-07-01 05:44:14.125 btags += ' ' * lb
2025-07-01 05:44:14.131 else:
2025-07-01 05:44:14.136 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:44:14.143 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:44:14.149 else:
2025-07-01 05:44:14.155 # the synch pair is identical
2025-07-01 05:44:14.161 yield ' ' + aelt
2025-07-01 05:44:14.167
2025-07-01 05:44:14.179 # pump out diffs from after the synch point
2025-07-01 05:44:14.191 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:44:14.203
2025-07-01 05:44:14.212 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:44:14.226 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:44:14.237
2025-07-01 05:44:14.250 self = <difflib.Differ object at [hex]>
2025-07-01 05:44:14.259 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:44:14.266 alo = 10, ahi = 1101
2025-07-01 05:44:14.272 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:44:14.276 blo = 10, bhi = 1101
2025-07-01 05:44:14.281
2025-07-01 05:44:14.286 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:44:14.290 g = []
2025-07-01 05:44:14.295 if alo < ahi:
2025-07-01 05:44:14.299 if blo < bhi:
2025-07-01 05:44:14.304 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:44:14.308 else:
2025-07-01 05:44:14.313 g = self._dump('-', a, alo, ahi)
2025-07-01 05:44:14.317 elif blo < bhi:
2025-07-01 05:44:14.322 g = self._dump('+', b, blo, bhi)
2025-07-01 05:44:14.326
2025-07-01 05:44:14.331 > yield from g
2025-07-01 05:44:14.335
2025-07-01 05:44:14.340 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:44:14.344 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:44:14.349
2025-07-01 05:44:14.353 self = <difflib.Differ object at [hex]>
2025-07-01 05:44:14.358 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:44:14.363 alo = 10, ahi = 1101
2025-07-01 05:44:14.368 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:44:14.374 blo = 10, bhi = 1101
2025-07-01 05:44:14.380
2025-07-01 05:44:14.388 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:44:14.394 r"""
2025-07-01 05:44:14.400 When replacing one block of lines with another, search the blocks
2025-07-01 05:44:14.405 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:44:14.410 synch point, and intraline difference marking is done on the
2025-07-01 05:44:14.414 similar pair. Lots of work, but often worth it.
2025-07-01 05:44:14.419
2025-07-01 05:44:14.423 Example:
2025-07-01 05:44:14.428
2025-07-01 05:44:14.433 >>> d = Differ()
2025-07-01 05:44:14.439 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:44:14.444 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:44:14.449 >>> print(''.join(results), end="")
2025-07-01 05:44:14.454 - abcDefghiJkl
2025-07-01 05:44:14.465 + abcdefGhijkl
2025-07-01 05:44:14.476 """
2025-07-01 05:44:14.488
2025-07-01 05:44:14.498 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:44:14.506 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:44:14.514 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:44:14.526 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:44:14.536 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:44:14.543
2025-07-01 05:44:14.550 # search for the pair that matches best without being identical
2025-07-01 05:44:14.562 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:44:14.571 # on junk -- unless we have to)
2025-07-01 05:44:14.578 for j in range(blo, bhi):
2025-07-01 05:44:14.585 bj = b[j]
2025-07-01 05:44:14.591 cruncher.set_seq2(bj)
2025-07-01 05:44:14.597 for i in range(alo, ahi):
2025-07-01 05:44:14.604 ai = a[i]
2025-07-01 05:44:14.610 if ai == bj:
2025-07-01 05:44:14.623 if eqi is None:
2025-07-01 05:44:14.633 eqi, eqj = i, j
2025-07-01 05:44:14.640 continue
2025-07-01 05:44:14.647 cruncher.set_seq1(ai)
2025-07-01 05:44:14.652 # computing similarity is expensive, so use the quick
2025-07-01 05:44:14.659 # upper bounds first -- have seen this speed up messy
2025-07-01 05:44:14.666 # compares by a factor of 3.
2025-07-01 05:44:14.678 # note that ratio() is only expensive to compute the first
2025-07-01 05:44:14.687 # time it's called on a sequence pair; the expensive part
2025-07-01 05:44:14.695 # of the computation is cached by cruncher
2025-07-01 05:44:14.705 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:44:14.715 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:44:14.728 cruncher.ratio() > best_ratio:
2025-07-01 05:44:14.741 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:44:14.753 if best_ratio < cutoff:
2025-07-01 05:44:14.761 # no non-identical "pretty close" pair
2025-07-01 05:44:14.768 if eqi is None:
2025-07-01 05:44:14.774 # no identical pair either -- treat it as a straight replace
2025-07-01 05:44:14.781 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:44:14.786 return
2025-07-01 05:44:14.791 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:44:14.797 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:44:14.802 else:
2025-07-01 05:44:14.812 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:44:14.822 eqi = None
2025-07-01 05:44:14.829
2025-07-01 05:44:14.837 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:44:14.843 # identical
2025-07-01 05:44:14.858
2025-07-01 05:44:14.870 # pump out diffs from before the synch point
2025-07-01 05:44:14.879 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:44:14.886
2025-07-01 05:44:14.897 # do intraline marking on the synch pair
2025-07-01 05:44:14.906 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:44:14.915 if eqi is None:
2025-07-01 05:44:14.924 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:44:14.933 atags = btags = ""
2025-07-01 05:44:14.944 cruncher.set_seqs(aelt, belt)
2025-07-01 05:44:14.955 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:44:14.961 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:44:14.968 if tag == 'replace':
2025-07-01 05:44:14.978 atags += '^' * la
2025-07-01 05:44:14.990 btags += '^' * lb
2025-07-01 05:44:15.001 elif tag == 'delete':
2025-07-01 05:44:15.013 atags += '-' * la
2025-07-01 05:44:15.024 elif tag == 'insert':
2025-07-01 05:44:15.033 btags += '+' * lb
2025-07-01 05:44:15.041 elif tag == 'equal':
2025-07-01 05:44:15.046 atags += ' ' * la
2025-07-01 05:44:15.055 btags += ' ' * lb
2025-07-01 05:44:15.060 else:
2025-07-01 05:44:15.066 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:44:15.074 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:44:15.080 else:
2025-07-01 05:44:15.086 # the synch pair is identical
2025-07-01 05:44:15.091 yield ' ' + aelt
2025-07-01 05:44:15.098
2025-07-01 05:44:15.106 # pump out diffs from after the synch point
2025-07-01 05:44:15.114 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:44:15.124
2025-07-01 05:44:15.134 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:44:15.143 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:44:15.153
2025-07-01 05:44:15.165 self = <difflib.Differ object at [hex]>
2025-07-01 05:44:15.174 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:44:15.184 alo = 11, ahi = 1101
2025-07-01 05:44:15.197 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:44:15.204 blo = 11, bhi = 1101
2025-07-01 05:44:15.211
2025-07-01 05:44:15.219 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:44:15.225 g = []
2025-07-01 05:44:15.231 if alo < ahi:
2025-07-01 05:44:15.238 if blo < bhi:
2025-07-01 05:44:15.246 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:44:15.252 else:
2025-07-01 05:44:15.258 g = self._dump('-', a, alo, ahi)
2025-07-01 05:44:15.263 elif blo < bhi:
2025-07-01 05:44:15.270 g = self._dump('+', b, blo, bhi)
2025-07-01 05:44:15.277
2025-07-01 05:44:15.283 > yield from g
2025-07-01 05:44:15.291
2025-07-01 05:44:15.301 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:44:15.309 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:44:15.320
2025-07-01 05:44:15.327 self = <difflib.Differ object at [hex]>
2025-07-01 05:44:15.337 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:44:15.350 alo = 11, ahi = 1101
2025-07-01 05:44:15.362 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:44:15.371 blo = 11, bhi = 1101
2025-07-01 05:44:15.381
2025-07-01 05:44:15.389 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:44:15.395 r"""
2025-07-01 05:44:15.402 When replacing one block of lines with another, search the blocks
2025-07-01 05:44:15.412 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:44:15.422 synch point, and intraline difference marking is done on the
2025-07-01 05:44:15.428 similar pair. Lots of work, but often worth it.
2025-07-01 05:44:15.434
2025-07-01 05:44:15.440 Example:
2025-07-01 05:44:15.446
2025-07-01 05:44:15.455 >>> d = Differ()
2025-07-01 05:44:15.467 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:44:15.474 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:44:15.480 >>> print(''.join(results), end="")
2025-07-01 05:44:15.485 - abcDefghiJkl
2025-07-01 05:44:15.497 + abcdefGhijkl
2025-07-01 05:44:15.506 """
2025-07-01 05:44:15.512
2025-07-01 05:44:15.518 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:44:15.524 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:44:15.536 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:44:15.549 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:44:15.561 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:44:15.569
2025-07-01 05:44:15.577 # search for the pair that matches best without being identical
2025-07-01 05:44:15.583 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:44:15.589 # on junk -- unless we have to)
2025-07-01 05:44:15.603 for j in range(blo, bhi):
2025-07-01 05:44:15.613 bj = b[j]
2025-07-01 05:44:15.619 cruncher.set_seq2(bj)
2025-07-01 05:44:15.628 for i in range(alo, ahi):
2025-07-01 05:44:15.634 ai = a[i]
2025-07-01 05:44:15.640 if ai == bj:
2025-07-01 05:44:15.645 if eqi is None:
2025-07-01 05:44:15.650 eqi, eqj = i, j
2025-07-01 05:44:15.656 continue
2025-07-01 05:44:15.661 cruncher.set_seq1(ai)
2025-07-01 05:44:15.667 # computing similarity is expensive, so use the quick
2025-07-01 05:44:15.672 # upper bounds first -- have seen this speed up messy
2025-07-01 05:44:15.677 # compares by a factor of 3.
2025-07-01 05:44:15.683 # note that ratio() is only expensive to compute the first
2025-07-01 05:44:15.689 # time it's called on a sequence pair; the expensive part
2025-07-01 05:44:15.694 # of the computation is cached by cruncher
2025-07-01 05:44:15.701 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:44:15.709 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:44:15.720 cruncher.ratio() > best_ratio:
2025-07-01 05:44:15.729 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:44:15.740 if best_ratio < cutoff:
2025-07-01 05:44:15.749 # no non-identical "pretty close" pair
2025-07-01 05:44:15.757 if eqi is None:
2025-07-01 05:44:15.769 # no identical pair either -- treat it as a straight replace
2025-07-01 05:44:15.778 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:44:15.785 return
2025-07-01 05:44:15.796 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:44:15.806 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:44:15.813 else:
2025-07-01 05:44:15.819 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:44:15.825 eqi = None
2025-07-01 05:44:15.832
2025-07-01 05:44:15.839 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:44:15.846 # identical
2025-07-01 05:44:15.856
2025-07-01 05:44:15.866 # pump out diffs from before the synch point
2025-07-01 05:44:15.873 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:44:15.879
2025-07-01 05:44:15.885 # do intraline marking on the synch pair
2025-07-01 05:44:15.891 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:44:15.898 if eqi is None:
2025-07-01 05:44:15.905 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:44:15.912 atags = btags = ""
2025-07-01 05:44:15.919 cruncher.set_seqs(aelt, belt)
2025-07-01 05:44:15.926 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:44:15.932 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:44:15.939 if tag == 'replace':
2025-07-01 05:44:15.947 atags += '^' * la
2025-07-01 05:44:15.959 btags += '^' * lb
2025-07-01 05:44:15.970 elif tag == 'delete':
2025-07-01 05:44:15.978 atags += '-' * la
2025-07-01 05:44:15.984 elif tag == 'insert':
2025-07-01 05:44:15.990 btags += '+' * lb
2025-07-01 05:44:15.996 elif tag == 'equal':
2025-07-01 05:44:16.003 atags += ' ' * la
2025-07-01 05:44:16.011 btags += ' ' * lb
2025-07-01 05:44:16.022 else:
2025-07-01 05:44:16.031 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:44:16.038 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:44:16.045 else:
2025-07-01 05:44:16.051 # the synch pair is identical
2025-07-01 05:44:16.059 yield ' ' + aelt
2025-07-01 05:44:16.068
2025-07-01 05:44:16.073 # pump out diffs from after the synch point
2025-07-01 05:44:16.078 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:44:16.083
2025-07-01 05:44:16.088 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:44:16.095 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:44:16.102
2025-07-01 05:44:16.108 self = <difflib.Differ object at [hex]>
2025-07-01 05:44:16.115 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:44:16.121 alo = 12, ahi = 1101
2025-07-01 05:44:16.129 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:44:16.136 blo = 12, bhi = 1101
2025-07-01 05:44:16.144
2025-07-01 05:44:16.155 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:44:16.165 g = []
2025-07-01 05:44:16.172 if alo < ahi:
2025-07-01 05:44:16.178 if blo < bhi:
2025-07-01 05:44:16.183 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:44:16.189 else:
2025-07-01 05:44:16.196 g = self._dump('-', a, alo, ahi)
2025-07-01 05:44:16.202 elif blo < bhi:
2025-07-01 05:44:16.211 g = self._dump('+', b, blo, bhi)
2025-07-01 05:44:16.224
2025-07-01 05:44:16.234 > yield from g
2025-07-01 05:44:16.242
2025-07-01 05:44:16.248 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:44:16.255 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:44:16.262
2025-07-01 05:44:16.272 self = <difflib.Differ object at [hex]>
2025-07-01 05:44:16.282 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:44:16.288 alo = 12, ahi = 1101
2025-07-01 05:44:16.296 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:44:16.302 blo = 12, bhi = 1101
2025-07-01 05:44:16.309
2025-07-01 05:44:16.316 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:44:16.323 r"""
2025-07-01 05:44:16.330 When replacing one block of lines with another, search the blocks
2025-07-01 05:44:16.337 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:44:16.344 synch point, and intraline difference marking is done on the
2025-07-01 05:44:16.351 similar pair. Lots of work, but often worth it.
2025-07-01 05:44:16.359
2025-07-01 05:44:16.371 Example:
2025-07-01 05:44:16.379
2025-07-01 05:44:16.386 >>> d = Differ()
2025-07-01 05:44:16.393 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:44:16.399 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:44:16.404 >>> print(''.join(results), end="")
2025-07-01 05:44:16.409 - abcDefghiJkl
2025-07-01 05:44:16.418 + abcdefGhijkl
2025-07-01 05:44:16.428 """
2025-07-01 05:44:16.433
2025-07-01 05:44:16.437 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:44:16.442 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:44:16.447 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:44:16.454 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:44:16.461 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:44:16.467
2025-07-01 05:44:16.475 # search for the pair that matches best without being identical
2025-07-01 05:44:16.482 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:44:16.492 # on junk -- unless we have to)
2025-07-01 05:44:16.501 for j in range(blo, bhi):
2025-07-01 05:44:16.508 bj = b[j]
2025-07-01 05:44:16.514 cruncher.set_seq2(bj)
2025-07-01 05:44:16.519 for i in range(alo, ahi):
2025-07-01 05:44:16.524 ai = a[i]
2025-07-01 05:44:16.529 if ai == bj:
2025-07-01 05:44:16.534 if eqi is None:
2025-07-01 05:44:16.538 eqi, eqj = i, j
2025-07-01 05:44:16.543 continue
2025-07-01 05:44:16.548 cruncher.set_seq1(ai)
2025-07-01 05:44:16.554 # computing similarity is expensive, so use the quick
2025-07-01 05:44:16.560 # upper bounds first -- have seen this speed up messy
2025-07-01 05:44:16.565 # compares by a factor of 3.
2025-07-01 05:44:16.571 # note that ratio() is only expensive to compute the first
2025-07-01 05:44:16.579 # time it's called on a sequence pair; the expensive part
2025-07-01 05:44:16.587 # of the computation is cached by cruncher
2025-07-01 05:44:16.595 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:44:16.602 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:44:16.608 cruncher.ratio() > best_ratio:
2025-07-01 05:44:16.614 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:44:16.619 if best_ratio < cutoff:
2025-07-01 05:44:16.625 # no non-identical "pretty close" pair
2025-07-01 05:44:16.631 if eqi is None:
2025-07-01 05:44:16.637 # no identical pair either -- treat it as a straight replace
2025-07-01 05:44:16.643 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:44:16.648 return
2025-07-01 05:44:16.654 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:44:16.661 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:44:16.665 else:
2025-07-01 05:44:16.670 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:44:16.675 eqi = None
2025-07-01 05:44:16.680
2025-07-01 05:44:16.685 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:44:16.690 # identical
2025-07-01 05:44:16.695
2025-07-01 05:44:16.701 # pump out diffs from before the synch point
2025-07-01 05:44:16.706 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:44:16.712
2025-07-01 05:44:16.718 # do intraline marking on the synch pair
2025-07-01 05:44:16.730 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:44:16.738 if eqi is None:
2025-07-01 05:44:16.745 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:44:16.750 atags = btags = ""
2025-07-01 05:44:16.755 cruncher.set_seqs(aelt, belt)
2025-07-01 05:44:16.760 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:44:16.765 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:44:16.770 if tag == 'replace':
2025-07-01 05:44:16.775 atags += '^' * la
2025-07-01 05:44:16.781 btags += '^' * lb
2025-07-01 05:44:16.786 elif tag == 'delete':
2025-07-01 05:44:16.792 atags += '-' * la
2025-07-01 05:44:16.798 elif tag == 'insert':
2025-07-01 05:44:16.808 btags += '+' * lb
2025-07-01 05:44:16.819 elif tag == 'equal':
2025-07-01 05:44:16.829 atags += ' ' * la
2025-07-01 05:44:16.836 btags += ' ' * lb
2025-07-01 05:44:16.843 else:
2025-07-01 05:44:16.849 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:44:16.854 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:44:16.859 else:
2025-07-01 05:44:16.864 # the synch pair is identical
2025-07-01 05:44:16.870 yield ' ' + aelt
2025-07-01 05:44:16.876
2025-07-01 05:44:16.888 # pump out diffs from after the synch point
2025-07-01 05:44:16.900 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:44:16.908
2025-07-01 05:44:16.918 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:44:16.927 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:44:16.935
2025-07-01 05:44:16.946 self = <difflib.Differ object at [hex]>
2025-07-01 05:44:16.954 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:44:16.960 alo = 13, ahi = 1101
2025-07-01 05:44:16.966 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:44:16.972 blo = 13, bhi = 1101
2025-07-01 05:44:16.979
2025-07-01 05:44:16.988 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:44:16.996 g = []
2025-07-01 05:44:17.003 if alo < ahi:
2025-07-01 05:44:17.010 if blo < bhi:
2025-07-01 05:44:17.019 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:44:17.028 else:
2025-07-01 05:44:17.035 g = self._dump('-', a, alo, ahi)
2025-07-01 05:44:17.043 elif blo < bhi:
2025-07-01 05:44:17.054 g = self._dump('+', b, blo, bhi)
2025-07-01 05:44:17.062
2025-07-01 05:44:17.070 > yield from g
2025-07-01 05:44:17.077
2025-07-01 05:44:17.083 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:44:17.090 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:44:17.095
2025-07-01 05:44:17.107 self = <difflib.Differ object at [hex]>
2025-07-01 05:44:17.119 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:44:17.130 alo = 13, ahi = 1101
2025-07-01 05:44:17.140 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:44:17.153 blo = 13, bhi = 1101
2025-07-01 05:44:17.163
2025-07-01 05:44:17.169 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:44:17.175 r"""
2025-07-01 05:44:17.181 When replacing one block of lines with another, search the blocks
2025-07-01 05:44:17.187 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:44:17.193 synch point, and intraline difference marking is done on the
2025-07-01 05:44:17.200 similar pair. Lots of work, but often worth it.
2025-07-01 05:44:17.205
2025-07-01 05:44:17.211 Example:
2025-07-01 05:44:17.216
2025-07-01 05:44:17.221 >>> d = Differ()
2025-07-01 05:44:17.232 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:44:17.241 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:44:17.248 >>> print(''.join(results), end="")
2025-07-01 05:44:17.255 - abcDefghiJkl
2025-07-01 05:44:17.267 + abcdefGhijkl
2025-07-01 05:44:17.280 """
2025-07-01 05:44:17.287
2025-07-01 05:44:17.295 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:44:17.303 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:44:17.309 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:44:17.315 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:44:17.320 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:44:17.325
2025-07-01 05:44:17.330 # search for the pair that matches best without being identical
2025-07-01 05:44:17.336 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:44:17.341 # on junk -- unless we have to)
2025-07-01 05:44:17.348 for j in range(blo, bhi):
2025-07-01 05:44:17.355 bj = b[j]
2025-07-01 05:44:17.362 cruncher.set_seq2(bj)
2025-07-01 05:44:17.373 for i in range(alo, ahi):
2025-07-01 05:44:17.384 ai = a[i]
2025-07-01 05:44:17.392 if ai == bj:
2025-07-01 05:44:17.398 if eqi is None:
2025-07-01 05:44:17.404 eqi, eqj = i, j
2025-07-01 05:44:17.409 continue
2025-07-01 05:44:17.415 cruncher.set_seq1(ai)
2025-07-01 05:44:17.425 # computing similarity is expensive, so use the quick
2025-07-01 05:44:17.439 # upper bounds first -- have seen this speed up messy
2025-07-01 05:44:17.450 # compares by a factor of 3.
2025-07-01 05:44:17.459 # note that ratio() is only expensive to compute the first
2025-07-01 05:44:17.465 # time it's called on a sequence pair; the expensive part
2025-07-01 05:44:17.474 # of the computation is cached by cruncher
2025-07-01 05:44:17.485 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:44:17.493 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:44:17.500 cruncher.ratio() > best_ratio:
2025-07-01 05:44:17.510 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:44:17.522 if best_ratio < cutoff:
2025-07-01 05:44:17.531 # no non-identical "pretty close" pair
2025-07-01 05:44:17.539 if eqi is None:
2025-07-01 05:44:17.546 # no identical pair either -- treat it as a straight replace
2025-07-01 05:44:17.553 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:44:17.559 return
2025-07-01 05:44:17.565 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:44:17.577 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:44:17.587 else:
2025-07-01 05:44:17.597 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:44:17.605 eqi = None
2025-07-01 05:44:17.612
2025-07-01 05:44:17.619 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:44:17.625 # identical
2025-07-01 05:44:17.631
2025-07-01 05:44:17.637 # pump out diffs from before the synch point
2025-07-01 05:44:17.643 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:44:17.648
2025-07-01 05:44:17.654 # do intraline marking on the synch pair
2025-07-01 05:44:17.662 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:44:17.669 if eqi is None:
2025-07-01 05:44:17.677 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:44:17.686 atags = btags = ""
2025-07-01 05:44:17.694 cruncher.set_seqs(aelt, belt)
2025-07-01 05:44:17.701 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:44:17.708 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:44:17.717 if tag == 'replace':
2025-07-01 05:44:17.728 atags += '^' * la
2025-07-01 05:44:17.739 btags += '^' * lb
2025-07-01 05:44:17.750 elif tag == 'delete':
2025-07-01 05:44:17.761 atags += '-' * la
2025-07-01 05:44:17.770 elif tag == 'insert':
2025-07-01 05:44:17.781 btags += '+' * lb
2025-07-01 05:44:17.794 elif tag == 'equal':
2025-07-01 05:44:17.804 atags += ' ' * la
2025-07-01 05:44:17.815 btags += ' ' * lb
2025-07-01 05:44:17.824 else:
2025-07-01 05:44:17.832 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:44:17.840 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:44:17.846 else:
2025-07-01 05:44:17.857 # the synch pair is identical
2025-07-01 05:44:17.865 yield ' ' + aelt
2025-07-01 05:44:17.875
2025-07-01 05:44:17.887 # pump out diffs from after the synch point
2025-07-01 05:44:17.896 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:44:17.902
2025-07-01 05:44:17.909 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:44:17.921 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:44:17.931
2025-07-01 05:44:17.944 self = <difflib.Differ object at [hex]>
2025-07-01 05:44:17.958 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:44:17.968 alo = 14, ahi = 1101
2025-07-01 05:44:17.979 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:44:17.986 blo = 14, bhi = 1101
2025-07-01 05:44:17.996
2025-07-01 05:44:18.006 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:44:18.018 g = []
2025-07-01 05:44:18.030 if alo < ahi:
2025-07-01 05:44:18.040 if blo < bhi:
2025-07-01 05:44:18.053 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:44:18.062 else:
2025-07-01 05:44:18.069 g = self._dump('-', a, alo, ahi)
2025-07-01 05:44:18.075 elif blo < bhi:
2025-07-01 05:44:18.081 g = self._dump('+', b, blo, bhi)
2025-07-01 05:44:18.087
2025-07-01 05:44:18.093 > yield from g
2025-07-01 05:44:18.099
2025-07-01 05:44:18.105 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:44:18.111 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:44:18.117
2025-07-01 05:44:18.123 self = <difflib.Differ object at [hex]>
2025-07-01 05:44:18.129 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:44:18.136 alo = 14, ahi = 1101
2025-07-01 05:44:18.144 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:44:18.150 blo = 14, bhi = 1101
2025-07-01 05:44:18.156
2025-07-01 05:44:18.163 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:44:18.169 r"""
2025-07-01 05:44:18.174 When replacing one block of lines with another, search the blocks
2025-07-01 05:44:18.180 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:44:18.186 synch point, and intraline difference marking is done on the
2025-07-01 05:44:18.192 similar pair. Lots of work, but often worth it.
2025-07-01 05:44:18.198
2025-07-01 05:44:18.208 Example:
2025-07-01 05:44:18.216
2025-07-01 05:44:18.223 >>> d = Differ()
2025-07-01 05:44:18.230 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:44:18.235 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:44:18.241 >>> print(''.join(results), end="")
2025-07-01 05:44:18.246 - abcDefghiJkl
2025-07-01 05:44:18.264 + abcdefGhijkl
2025-07-01 05:44:18.285 """
2025-07-01 05:44:18.296
2025-07-01 05:44:18.305 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:44:18.315 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:44:18.323 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:44:18.333 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:44:18.346 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:44:18.356
2025-07-01 05:44:18.365 # search for the pair that matches best without being identical
2025-07-01 05:44:18.373 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:44:18.379 # on junk -- unless we have to)
2025-07-01 05:44:18.385 for j in range(blo, bhi):
2025-07-01 05:44:18.395 bj = b[j]
2025-07-01 05:44:18.407 cruncher.set_seq2(bj)
2025-07-01 05:44:18.416 for i in range(alo, ahi):
2025-07-01 05:44:18.423 ai = a[i]
2025-07-01 05:44:18.431 if ai == bj:
2025-07-01 05:44:18.441 if eqi is None:
2025-07-01 05:44:18.451 eqi, eqj = i, j
2025-07-01 05:44:18.460 continue
2025-07-01 05:44:18.471 cruncher.set_seq1(ai)
2025-07-01 05:44:18.483 # computing similarity is expensive, so use the quick
2025-07-01 05:44:18.497 # upper bounds first -- have seen this speed up messy
2025-07-01 05:44:18.510 # compares by a factor of 3.
2025-07-01 05:44:18.519 # note that ratio() is only expensive to compute the first
2025-07-01 05:44:18.526 # time it's called on a sequence pair; the expensive part
2025-07-01 05:44:18.533 # of the computation is cached by cruncher
2025-07-01 05:44:18.538 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:44:18.544 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:44:18.549 cruncher.ratio() > best_ratio:
2025-07-01 05:44:18.554 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:44:18.559 if best_ratio < cutoff:
2025-07-01 05:44:18.564 # no non-identical "pretty close" pair
2025-07-01 05:44:18.569 if eqi is None:
2025-07-01 05:44:18.577 # no identical pair either -- treat it as a straight replace
2025-07-01 05:44:18.585 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:44:18.592 return
2025-07-01 05:44:18.599 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:44:18.606 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:44:18.610 else:
2025-07-01 05:44:18.615 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:44:18.620 eqi = None
2025-07-01 05:44:18.624
2025-07-01 05:44:18.629 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:44:18.634 # identical
2025-07-01 05:44:18.638
2025-07-01 05:44:18.643 # pump out diffs from before the synch point
2025-07-01 05:44:18.648 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:44:18.653
2025-07-01 05:44:18.657 # do intraline marking on the synch pair
2025-07-01 05:44:18.662 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:44:18.667 if eqi is None:
2025-07-01 05:44:18.675 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:44:18.687 atags = btags = ""
2025-07-01 05:44:18.695 cruncher.set_seqs(aelt, belt)
2025-07-01 05:44:18.702 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:44:18.708 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:44:18.713 if tag == 'replace':
2025-07-01 05:44:18.719 atags += '^' * la
2025-07-01 05:44:18.726 btags += '^' * lb
2025-07-01 05:44:18.739 elif tag == 'delete':
2025-07-01 05:44:18.751 atags += '-' * la
2025-07-01 05:44:18.760 elif tag == 'insert':
2025-07-01 05:44:18.768 btags += '+' * lb
2025-07-01 05:44:18.774 elif tag == 'equal':
2025-07-01 05:44:18.785 atags += ' ' * la
2025-07-01 05:44:18.796 btags += ' ' * lb
2025-07-01 05:44:18.805 else:
2025-07-01 05:44:18.817 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:44:18.826 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:44:18.834 else:
2025-07-01 05:44:18.843 # the synch pair is identical
2025-07-01 05:44:18.853 yield ' ' + aelt
2025-07-01 05:44:18.862
2025-07-01 05:44:18.873 # pump out diffs from after the synch point
2025-07-01 05:44:18.882 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:44:18.893
2025-07-01 05:44:18.904 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:44:18.915 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:44:18.923
2025-07-01 05:44:18.935 self = <difflib.Differ object at [hex]>
2025-07-01 05:44:18.946 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:44:18.958 alo = 15, ahi = 1101
2025-07-01 05:44:18.971 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:44:18.979 blo = 15, bhi = 1101
2025-07-01 05:44:18.988
2025-07-01 05:44:18.998 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:44:19.009 g = []
2025-07-01 05:44:19.019 if alo < ahi:
2025-07-01 05:44:19.028 if blo < bhi:
2025-07-01 05:44:19.035 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:44:19.043 else:
2025-07-01 05:44:19.054 g = self._dump('-', a, alo, ahi)
2025-07-01 05:44:19.066 elif blo < bhi:
2025-07-01 05:44:19.076 g = self._dump('+', b, blo, bhi)
2025-07-01 05:44:19.088
2025-07-01 05:44:19.100 > yield from g
2025-07-01 05:44:19.110
2025-07-01 05:44:19.119 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:44:19.126 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:44:19.139
2025-07-01 05:44:19.149 self = <difflib.Differ object at [hex]>
2025-07-01 05:44:19.159 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:44:19.171 alo = 15, ahi = 1101
2025-07-01 05:44:19.181 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:44:19.187 blo = 15, bhi = 1101
2025-07-01 05:44:19.192
2025-07-01 05:44:19.197 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:44:19.201 r"""
2025-07-01 05:44:19.205 When replacing one block of lines with another, search the blocks
2025-07-01 05:44:19.210 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:44:19.214 synch point, and intraline difference marking is done on the
2025-07-01 05:44:19.219 similar pair. Lots of work, but often worth it.
2025-07-01 05:44:19.224
2025-07-01 05:44:19.228 Example:
2025-07-01 05:44:19.233
2025-07-01 05:44:19.237 >>> d = Differ()
2025-07-01 05:44:19.242 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:44:19.252 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:44:19.262 >>> print(''.join(results), end="")
2025-07-01 05:44:19.273 - abcDefghiJkl
2025-07-01 05:44:19.293 + abcdefGhijkl
2025-07-01 05:44:19.315 """
2025-07-01 05:44:19.324
2025-07-01 05:44:19.330 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:44:19.339 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:44:19.344 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:44:19.349 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:44:19.354 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:44:19.359
2025-07-01 05:44:19.367 # search for the pair that matches best without being identical
2025-07-01 05:44:19.379 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:44:19.387 # on junk -- unless we have to)
2025-07-01 05:44:19.394 for j in range(blo, bhi):
2025-07-01 05:44:19.402 bj = b[j]
2025-07-01 05:44:19.408 cruncher.set_seq2(bj)
2025-07-01 05:44:19.415 for i in range(alo, ahi):
2025-07-01 05:44:19.420 ai = a[i]
2025-07-01 05:44:19.426 if ai == bj:
2025-07-01 05:44:19.432 if eqi is None:
2025-07-01 05:44:19.438 eqi, eqj = i, j
2025-07-01 05:44:19.445 continue
2025-07-01 05:44:19.457 cruncher.set_seq1(ai)
2025-07-01 05:44:19.468 # computing similarity is expensive, so use the quick
2025-07-01 05:44:19.479 # upper bounds first -- have seen this speed up messy
2025-07-01 05:44:19.488 # compares by a factor of 3.
2025-07-01 05:44:19.495 # note that ratio() is only expensive to compute the first
2025-07-01 05:44:19.508 # time it's called on a sequence pair; the expensive part
2025-07-01 05:44:19.520 # of the computation is cached by cruncher
2025-07-01 05:44:19.530 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:44:19.541 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:44:19.551 cruncher.ratio() > best_ratio:
2025-07-01 05:44:19.565 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:44:19.577 if best_ratio < cutoff:
2025-07-01 05:44:19.588 # no non-identical "pretty close" pair
2025-07-01 05:44:19.601 if eqi is None:
2025-07-01 05:44:19.611 # no identical pair either -- treat it as a straight replace
2025-07-01 05:44:19.623 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:44:19.634 return
2025-07-01 05:44:19.644 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:44:19.657 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:44:19.665 else:
2025-07-01 05:44:19.672 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:44:19.680 eqi = None
2025-07-01 05:44:19.692
2025-07-01 05:44:19.702 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:44:19.711 # identical
2025-07-01 05:44:19.718
2025-07-01 05:44:19.731 # pump out diffs from before the synch point
2025-07-01 05:44:19.742 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:44:19.751
2025-07-01 05:44:19.758 # do intraline marking on the synch pair
2025-07-01 05:44:19.771 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:44:19.783 if eqi is None:
2025-07-01 05:44:19.794 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:44:19.803 atags = btags = ""
2025-07-01 05:44:19.812 cruncher.set_seqs(aelt, belt)
2025-07-01 05:44:19.819 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:44:19.825 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:44:19.831 if tag == 'replace':
2025-07-01 05:44:19.837 atags += '^' * la
2025-07-01 05:44:19.843 btags += '^' * lb
2025-07-01 05:44:19.865 elif tag == 'delete':
2025-07-01 05:44:19.873 atags += '-' * la
2025-07-01 05:44:19.879 elif tag == 'insert':
2025-07-01 05:44:19.885 btags += '+' * lb
2025-07-01 05:44:19.889 elif tag == 'equal':
2025-07-01 05:44:19.894 atags += ' ' * la
2025-07-01 05:44:19.898 btags += ' ' * lb
2025-07-01 05:44:19.904 else:
2025-07-01 05:44:19.910 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:44:19.915 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:44:19.922 else:
2025-07-01 05:44:19.933 # the synch pair is identical
2025-07-01 05:44:19.943 yield ' ' + aelt
2025-07-01 05:44:19.953
2025-07-01 05:44:19.967 # pump out diffs from after the synch point
2025-07-01 05:44:19.980 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:44:19.991
2025-07-01 05:44:20.002 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:44:20.013 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:44:20.024
2025-07-01 05:44:20.031 self = <difflib.Differ object at [hex]>
2025-07-01 05:44:20.038 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:44:20.044 alo = 16, ahi = 1101
2025-07-01 05:44:20.049 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:44:20.054 blo = 16, bhi = 1101
2025-07-01 05:44:20.059
2025-07-01 05:44:20.064 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:44:20.069 g = []
2025-07-01 05:44:20.073 if alo < ahi:
2025-07-01 05:44:20.079 if blo < bhi:
2025-07-01 05:44:20.084 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:44:20.090 else:
2025-07-01 05:44:20.098 g = self._dump('-', a, alo, ahi)
2025-07-01 05:44:20.105 elif blo < bhi:
2025-07-01 05:44:20.116 g = self._dump('+', b, blo, bhi)
2025-07-01 05:44:20.127
2025-07-01 05:44:20.133 > yield from g
2025-07-01 05:44:20.139
2025-07-01 05:44:20.146 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:44:20.153 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:44:20.160
2025-07-01 05:44:20.168 self = <difflib.Differ object at [hex]>
2025-07-01 05:44:20.176 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:44:20.183 alo = 16, ahi = 1101
2025-07-01 05:44:20.191 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:44:20.199 blo = 16, bhi = 1101
2025-07-01 05:44:20.210
2025-07-01 05:44:20.218 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:44:20.225 r"""
2025-07-01 05:44:20.231 When replacing one block of lines with another, search the blocks
2025-07-01 05:44:20.236 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:44:20.242 synch point, and intraline difference marking is done on the
2025-07-01 05:44:20.247 similar pair. Lots of work, but often worth it.
2025-07-01 05:44:20.253
2025-07-01 05:44:20.258 Example:
2025-07-01 05:44:20.266
2025-07-01 05:44:20.274 >>> d = Differ()
2025-07-01 05:44:20.281 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:44:20.288 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:44:20.294 >>> print(''.join(results), end="")
2025-07-01 05:44:20.300 - abcDefghiJkl
2025-07-01 05:44:20.313 + abcdefGhijkl
2025-07-01 05:44:20.327 """
2025-07-01 05:44:20.339
2025-07-01 05:44:20.346 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:44:20.352 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:44:20.359 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:44:20.365 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:44:20.377 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:44:20.386
2025-07-01 05:44:20.392 # search for the pair that matches best without being identical
2025-07-01 05:44:20.399 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:44:20.407 # on junk -- unless we have to)
2025-07-01 05:44:20.413 for j in range(blo, bhi):
2025-07-01 05:44:20.418 bj = b[j]
2025-07-01 05:44:20.426 cruncher.set_seq2(bj)
2025-07-01 05:44:20.432 for i in range(alo, ahi):
2025-07-01 05:44:20.438 ai = a[i]
2025-07-01 05:44:20.443 if ai == bj:
2025-07-01 05:44:20.449 if eqi is None:
2025-07-01 05:44:20.455 eqi, eqj = i, j
2025-07-01 05:44:20.461 continue
2025-07-01 05:44:20.468 cruncher.set_seq1(ai)
2025-07-01 05:44:20.474 # computing similarity is expensive, so use the quick
2025-07-01 05:44:20.482 # upper bounds first -- have seen this speed up messy
2025-07-01 05:44:20.488 # compares by a factor of 3.
2025-07-01 05:44:20.498 # note that ratio() is only expensive to compute the first
2025-07-01 05:44:20.511 # time it's called on a sequence pair; the expensive part
2025-07-01 05:44:20.520 # of the computation is cached by cruncher
2025-07-01 05:44:20.530 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:44:20.540 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:44:20.548 cruncher.ratio() > best_ratio:
2025-07-01 05:44:20.555 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:44:20.562 if best_ratio < cutoff:
2025-07-01 05:44:20.568 # no non-identical "pretty close" pair
2025-07-01 05:44:20.574 if eqi is None:
2025-07-01 05:44:20.583 # no identical pair either -- treat it as a straight replace
2025-07-01 05:44:20.595 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:44:20.603 return
2025-07-01 05:44:20.610 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:44:20.619 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:44:20.625 else:
2025-07-01 05:44:20.632 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:44:20.638 eqi = None
2025-07-01 05:44:20.645
2025-07-01 05:44:20.652 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:44:20.659 # identical
2025-07-01 05:44:20.667
2025-07-01 05:44:20.678 # pump out diffs from before the synch point
2025-07-01 05:44:20.687 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:44:20.694
2025-07-01 05:44:20.699 # do intraline marking on the synch pair
2025-07-01 05:44:20.704 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:44:20.709 if eqi is None:
2025-07-01 05:44:20.715 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:44:20.721 atags = btags = ""
2025-07-01 05:44:20.727 cruncher.set_seqs(aelt, belt)
2025-07-01 05:44:20.734 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:44:20.742 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:44:20.753 if tag == 'replace':
2025-07-01 05:44:20.762 atags += '^' * la
2025-07-01 05:44:20.774 btags += '^' * lb
2025-07-01 05:44:20.783 elif tag == 'delete':
2025-07-01 05:44:20.790 atags += '-' * la
2025-07-01 05:44:20.796 elif tag == 'insert':
2025-07-01 05:44:20.803 btags += '+' * lb
2025-07-01 05:44:20.815 elif tag == 'equal':
2025-07-01 05:44:20.824 atags += ' ' * la
2025-07-01 05:44:20.832 btags += ' ' * lb
2025-07-01 05:44:20.839 else:
2025-07-01 05:44:20.845 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:44:20.851 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:44:20.858 else:
2025-07-01 05:44:20.869 # the synch pair is identical
2025-07-01 05:44:20.877 yield ' ' + aelt
2025-07-01 05:44:20.884
2025-07-01 05:44:20.890 # pump out diffs from after the synch point
2025-07-01 05:44:20.898 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:44:20.905
2025-07-01 05:44:20.911 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:44:20.917 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:44:20.923
2025-07-01 05:44:20.928 self = <difflib.Differ object at [hex]>
2025-07-01 05:44:20.935 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:44:20.942 alo = 17, ahi = 1101
2025-07-01 05:44:20.950 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:44:20.957 blo = 17, bhi = 1101
2025-07-01 05:44:20.963
2025-07-01 05:44:20.970 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:44:20.976 g = []
2025-07-01 05:44:20.983 if alo < ahi:
2025-07-01 05:44:20.989 if blo < bhi:
2025-07-01 05:44:21.001 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:44:21.011 else:
2025-07-01 05:44:21.019 g = self._dump('-', a, alo, ahi)
2025-07-01 05:44:21.028 elif blo < bhi:
2025-07-01 05:44:21.038 g = self._dump('+', b, blo, bhi)
2025-07-01 05:44:21.044
2025-07-01 05:44:21.050 > yield from g
2025-07-01 05:44:21.057
2025-07-01 05:44:21.063 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:44:21.070 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:44:21.077
2025-07-01 05:44:21.084 self = <difflib.Differ object at [hex]>
2025-07-01 05:44:21.092 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:44:21.098 alo = 17, ahi = 1101
2025-07-01 05:44:21.106 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:44:21.113 blo = 17, bhi = 1101
2025-07-01 05:44:21.119
2025-07-01 05:44:21.126 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:44:21.136 r"""
2025-07-01 05:44:21.147 When replacing one block of lines with another, search the blocks
2025-07-01 05:44:21.156 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:44:21.162 synch point, and intraline difference marking is done on the
2025-07-01 05:44:21.168 similar pair. Lots of work, but often worth it.
2025-07-01 05:44:21.174
2025-07-01 05:44:21.182 Example:
2025-07-01 05:44:21.191
2025-07-01 05:44:21.202 >>> d = Differ()
2025-07-01 05:44:21.215 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:44:21.226 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:44:21.238 >>> print(''.join(results), end="")
2025-07-01 05:44:21.250 - abcDefghiJkl
2025-07-01 05:44:21.269 + abcdefGhijkl
2025-07-01 05:44:21.282 """
2025-07-01 05:44:21.288
2025-07-01 05:44:21.294 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:44:21.305 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:44:21.317 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:44:21.326 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:44:21.333 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:44:21.340
2025-07-01 05:44:21.347 # search for the pair that matches best without being identical
2025-07-01 05:44:21.353 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:44:21.358 # on junk -- unless we have to)
2025-07-01 05:44:21.364 for j in range(blo, bhi):
2025-07-01 05:44:21.378 bj = b[j]
2025-07-01 05:44:21.389 cruncher.set_seq2(bj)
2025-07-01 05:44:21.396 for i in range(alo, ahi):
2025-07-01 05:44:21.403 ai = a[i]
2025-07-01 05:44:21.409 if ai == bj:
2025-07-01 05:44:21.415 if eqi is None:
2025-07-01 05:44:21.420 eqi, eqj = i, j
2025-07-01 05:44:21.425 continue
2025-07-01 05:44:21.431 cruncher.set_seq1(ai)
2025-07-01 05:44:21.439 # computing similarity is expensive, so use the quick
2025-07-01 05:44:21.449 # upper bounds first -- have seen this speed up messy
2025-07-01 05:44:21.457 # compares by a factor of 3.
2025-07-01 05:44:21.464 # note that ratio() is only expensive to compute the first
2025-07-01 05:44:21.469 # time it's called on a sequence pair; the expensive part
2025-07-01 05:44:21.476 # of the computation is cached by cruncher
2025-07-01 05:44:21.484 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:44:21.489 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:44:21.495 cruncher.ratio() > best_ratio:
2025-07-01 05:44:21.500 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:44:21.506 if best_ratio < cutoff:
2025-07-01 05:44:21.519 # no non-identical "pretty close" pair
2025-07-01 05:44:21.529 if eqi is None:
2025-07-01 05:44:21.537 # no identical pair either -- treat it as a straight replace
2025-07-01 05:44:21.551 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:44:21.557 return
2025-07-01 05:44:21.564 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:44:21.570 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:44:21.575 else:
2025-07-01 05:44:21.580 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:44:21.586 eqi = None
2025-07-01 05:44:21.591
2025-07-01 05:44:21.597 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:44:21.603 # identical
2025-07-01 05:44:21.611
2025-07-01 05:44:21.622 # pump out diffs from before the synch point
2025-07-01 05:44:21.630 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:44:21.637
2025-07-01 05:44:21.643 # do intraline marking on the synch pair
2025-07-01 05:44:21.648 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:44:21.653 if eqi is None:
2025-07-01 05:44:21.658 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:44:21.663 atags = btags = ""
2025-07-01 05:44:21.669 cruncher.set_seqs(aelt, belt)
2025-07-01 05:44:21.675 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:44:21.680 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:44:21.685 if tag == 'replace':
2025-07-01 05:44:21.691 atags += '^' * la
2025-07-01 05:44:21.697 btags += '^' * lb
2025-07-01 05:44:21.703 elif tag == 'delete':
2025-07-01 05:44:21.710 atags += '-' * la
2025-07-01 05:44:21.721 elif tag == 'insert':
2025-07-01 05:44:21.731 btags += '+' * lb
2025-07-01 05:44:21.739 elif tag == 'equal':
2025-07-01 05:44:21.746 atags += ' ' * la
2025-07-01 05:44:21.752 btags += ' ' * lb
2025-07-01 05:44:21.758 else:
2025-07-01 05:44:21.764 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:44:21.770 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:44:21.775 else:
2025-07-01 05:44:21.780 # the synch pair is identical
2025-07-01 05:44:21.786 yield ' ' + aelt
2025-07-01 05:44:21.792
2025-07-01 05:44:21.798 # pump out diffs from after the synch point
2025-07-01 05:44:21.804 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:44:21.810
2025-07-01 05:44:21.822 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:44:21.832 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:44:21.842
2025-07-01 05:44:21.852 self = <difflib.Differ object at [hex]>
2025-07-01 05:44:21.866 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:44:21.874 alo = 18, ahi = 1101
2025-07-01 05:44:21.882 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:44:21.889 blo = 18, bhi = 1101
2025-07-01 05:44:21.895
2025-07-01 05:44:21.900 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:44:21.906 g = []
2025-07-01 05:44:21.916 if alo < ahi:
2025-07-01 05:44:21.923 if blo < bhi:
2025-07-01 05:44:21.930 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:44:21.936 else:
2025-07-01 05:44:21.943 g = self._dump('-', a, alo, ahi)
2025-07-01 05:44:21.952 elif blo < bhi:
2025-07-01 05:44:21.962 g = self._dump('+', b, blo, bhi)
2025-07-01 05:44:21.968
2025-07-01 05:44:21.973 > yield from g
2025-07-01 05:44:21.978
2025-07-01 05:44:21.983 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:44:21.989 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:44:21.994
2025-07-01 05:44:22.005 self = <difflib.Differ object at [hex]>
2025-07-01 05:44:22.017 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:44:22.029 alo = 18, ahi = 1101
2025-07-01 05:44:22.039 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:44:22.045 blo = 18, bhi = 1101
2025-07-01 05:44:22.050
2025-07-01 05:44:22.055 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:44:22.059 r"""
2025-07-01 05:44:22.064 When replacing one block of lines with another, search the blocks
2025-07-01 05:44:22.069 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:44:22.075 synch point, and intraline difference marking is done on the
2025-07-01 05:44:22.080 similar pair. Lots of work, but often worth it.
2025-07-01 05:44:22.086
2025-07-01 05:44:22.091 Example:
2025-07-01 05:44:22.096
2025-07-01 05:44:22.102 >>> d = Differ()
2025-07-01 05:44:22.112 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:44:22.121 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:44:22.129 >>> print(''.join(results), end="")
2025-07-01 05:44:22.135 - abcDefghiJkl
2025-07-01 05:44:22.153 + abcdefGhijkl
2025-07-01 05:44:22.166 """
2025-07-01 05:44:22.172
2025-07-01 05:44:22.185 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:44:22.195 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:44:22.204 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:44:22.211 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:44:22.220 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:44:22.231
2025-07-01 05:44:22.240 # search for the pair that matches best without being identical
2025-07-01 05:44:22.248 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:44:22.260 # on junk -- unless we have to)
2025-07-01 05:44:22.268 for j in range(blo, bhi):
2025-07-01 05:44:22.279 bj = b[j]
2025-07-01 05:44:22.288 cruncher.set_seq2(bj)
2025-07-01 05:44:22.296 for i in range(alo, ahi):
2025-07-01 05:44:22.302 ai = a[i]
2025-07-01 05:44:22.308 if ai == bj:
2025-07-01 05:44:22.314 if eqi is None:
2025-07-01 05:44:22.320 eqi, eqj = i, j
2025-07-01 05:44:22.327 continue
2025-07-01 05:44:22.338 cruncher.set_seq1(ai)
2025-07-01 05:44:22.346 # computing similarity is expensive, so use the quick
2025-07-01 05:44:22.353 # upper bounds first -- have seen this speed up messy
2025-07-01 05:44:22.358 # compares by a factor of 3.
2025-07-01 05:44:22.365 # note that ratio() is only expensive to compute the first
2025-07-01 05:44:22.371 # time it's called on a sequence pair; the expensive part
2025-07-01 05:44:22.378 # of the computation is cached by cruncher
2025-07-01 05:44:22.384 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:44:22.391 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:44:22.398 cruncher.ratio() > best_ratio:
2025-07-01 05:44:22.404 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:44:22.410 if best_ratio < cutoff:
2025-07-01 05:44:22.416 # no non-identical "pretty close" pair
2025-07-01 05:44:22.422 if eqi is None:
2025-07-01 05:44:22.428 # no identical pair either -- treat it as a straight replace
2025-07-01 05:44:22.435 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:44:22.446 return
2025-07-01 05:44:22.454 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:44:22.460 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:44:22.466 else:
2025-07-01 05:44:22.472 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:44:22.478 eqi = None
2025-07-01 05:44:22.487
2025-07-01 05:44:22.495 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:44:22.505 # identical
2025-07-01 05:44:22.515
2025-07-01 05:44:22.526 # pump out diffs from before the synch point
2025-07-01 05:44:22.537 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:44:22.544
2025-07-01 05:44:22.549 # do intraline marking on the synch pair
2025-07-01 05:44:22.555 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:44:22.563 if eqi is None:
2025-07-01 05:44:22.574 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:44:22.582 atags = btags = ""
2025-07-01 05:44:22.588 cruncher.set_seqs(aelt, belt)
2025-07-01 05:44:22.594 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:44:22.600 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:44:22.606 if tag == 'replace':
2025-07-01 05:44:22.611 atags += '^' * la
2025-07-01 05:44:22.617 btags += '^' * lb
2025-07-01 05:44:22.623 elif tag == 'delete':
2025-07-01 05:44:22.629 atags += '-' * la
2025-07-01 05:44:22.636 elif tag == 'insert':
2025-07-01 05:44:22.642 btags += '+' * lb
2025-07-01 05:44:22.655 elif tag == 'equal':
2025-07-01 05:44:22.663 atags += ' ' * la
2025-07-01 05:44:22.670 btags += ' ' * lb
2025-07-01 05:44:22.676 else:
2025-07-01 05:44:22.681 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:44:22.691 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:44:22.698 else:
2025-07-01 05:44:22.706 # the synch pair is identical
2025-07-01 05:44:22.711 yield ' ' + aelt
2025-07-01 05:44:22.718
2025-07-01 05:44:22.729 # pump out diffs from after the synch point
2025-07-01 05:44:22.738 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:44:22.746
2025-07-01 05:44:22.751 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:44:22.757 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:44:22.763
2025-07-01 05:44:22.768 self = <difflib.Differ object at [hex]>
2025-07-01 05:44:22.774 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:44:22.785 alo = 19, ahi = 1101
2025-07-01 05:44:22.793 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:44:22.800 blo = 19, bhi = 1101
2025-07-01 05:44:22.805
2025-07-01 05:44:22.811 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:44:22.816 g = []
2025-07-01 05:44:22.823 if alo < ahi:
2025-07-01 05:44:22.830 if blo < bhi:
2025-07-01 05:44:22.839 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:44:22.845 else:
2025-07-01 05:44:22.852 g = self._dump('-', a, alo, ahi)
2025-07-01 05:44:22.858 elif blo < bhi:
2025-07-01 05:44:22.866 g = self._dump('+', b, blo, bhi)
2025-07-01 05:44:22.876
2025-07-01 05:44:22.885 > yield from g
2025-07-01 05:44:22.891
2025-07-01 05:44:22.897 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:44:22.902 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:44:22.907
2025-07-01 05:44:22.911 self = <difflib.Differ object at [hex]>
2025-07-01 05:44:22.917 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:44:22.921 alo = 19, ahi = 1101
2025-07-01 05:44:22.926 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:44:22.931 blo = 19, bhi = 1101
2025-07-01 05:44:22.936
2025-07-01 05:44:22.942 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:44:22.947 r"""
2025-07-01 05:44:22.954 When replacing one block of lines with another, search the blocks
2025-07-01 05:44:22.961 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:44:22.969 synch point, and intraline difference marking is done on the
2025-07-01 05:44:22.974 similar pair. Lots of work, but often worth it.
2025-07-01 05:44:22.980
2025-07-01 05:44:22.986 Example:
2025-07-01 05:44:22.991
2025-07-01 05:44:22.997 >>> d = Differ()
2025-07-01 05:44:23.004 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:44:23.013 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:44:23.020 >>> print(''.join(results), end="")
2025-07-01 05:44:23.027 - abcDefghiJkl
2025-07-01 05:44:23.038 + abcdefGhijkl
2025-07-01 05:44:23.050 """
2025-07-01 05:44:23.059
2025-07-01 05:44:23.066 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:44:23.072 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:44:23.079 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:44:23.085 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:44:23.090 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:44:23.096
2025-07-01 05:44:23.102 # search for the pair that matches best without being identical
2025-07-01 05:44:23.111 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:44:23.117 # on junk -- unless we have to)
2025-07-01 05:44:23.123 for j in range(blo, bhi):
2025-07-01 05:44:23.128 bj = b[j]
2025-07-01 05:44:23.133 cruncher.set_seq2(bj)
2025-07-01 05:44:23.139 for i in range(alo, ahi):
2025-07-01 05:44:23.145 ai = a[i]
2025-07-01 05:44:23.153 if ai == bj:
2025-07-01 05:44:23.158 if eqi is None:
2025-07-01 05:44:23.164 eqi, eqj = i, j
2025-07-01 05:44:23.169 continue
2025-07-01 05:44:23.177 cruncher.set_seq1(ai)
2025-07-01 05:44:23.186 # computing similarity is expensive, so use the quick
2025-07-01 05:44:23.192 # upper bounds first -- have seen this speed up messy
2025-07-01 05:44:23.199 # compares by a factor of 3.
2025-07-01 05:44:23.207 # note that ratio() is only expensive to compute the first
2025-07-01 05:44:23.219 # time it's called on a sequence pair; the expensive part
2025-07-01 05:44:23.227 # of the computation is cached by cruncher
2025-07-01 05:44:23.234 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:44:23.239 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:44:23.245 cruncher.ratio() > best_ratio:
2025-07-01 05:44:23.252 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:44:23.258 if best_ratio < cutoff:
2025-07-01 05:44:23.264 # no non-identical "pretty close" pair
2025-07-01 05:44:23.269 if eqi is None:
2025-07-01 05:44:23.274 # no identical pair either -- treat it as a straight replace
2025-07-01 05:44:23.279 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:44:23.284 return
2025-07-01 05:44:23.290 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:44:23.295 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:44:23.300 else:
2025-07-01 05:44:23.306 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:44:23.313 eqi = None
2025-07-01 05:44:23.321
2025-07-01 05:44:23.328 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:44:23.334 # identical
2025-07-01 05:44:23.339
2025-07-01 05:44:23.344 # pump out diffs from before the synch point
2025-07-01 05:44:23.348 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:44:23.352
2025-07-01 05:44:23.357 # do intraline marking on the synch pair
2025-07-01 05:44:23.361 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:44:23.365 if eqi is None:
2025-07-01 05:44:23.370 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:44:23.374 atags = btags = ""
2025-07-01 05:44:23.379 cruncher.set_seqs(aelt, belt)
2025-07-01 05:44:23.385 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:44:23.390 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:44:23.396 if tag == 'replace':
2025-07-01 05:44:23.403 atags += '^' * la
2025-07-01 05:44:23.411 btags += '^' * lb
2025-07-01 05:44:23.422 elif tag == 'delete':
2025-07-01 05:44:23.429 atags += '-' * la
2025-07-01 05:44:23.437 elif tag == 'insert':
2025-07-01 05:44:23.448 btags += '+' * lb
2025-07-01 05:44:23.459 elif tag == 'equal':
2025-07-01 05:44:23.468 atags += ' ' * la
2025-07-01 05:44:23.475 btags += ' ' * lb
2025-07-01 05:44:23.481 else:
2025-07-01 05:44:23.487 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:44:23.492 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:44:23.497 else:
2025-07-01 05:44:23.503 # the synch pair is identical
2025-07-01 05:44:23.509 yield ' ' + aelt
2025-07-01 05:44:23.516
2025-07-01 05:44:23.523 # pump out diffs from after the synch point
2025-07-01 05:44:23.531 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:44:23.538
2025-07-01 05:44:23.549 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:44:23.558 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:44:23.565
2025-07-01 05:44:23.572 self = <difflib.Differ object at [hex]>
2025-07-01 05:44:23.579 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:44:23.588 alo = 20, ahi = 1101
2025-07-01 05:44:23.596 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:44:23.608 blo = 20, bhi = 1101
2025-07-01 05:44:23.618
2025-07-01 05:44:23.625 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:44:23.632 g = []
2025-07-01 05:44:23.639 if alo < ahi:
2025-07-01 05:44:23.647 if blo < bhi:
2025-07-01 05:44:23.658 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:44:23.666 else:
2025-07-01 05:44:23.673 g = self._dump('-', a, alo, ahi)
2025-07-01 05:44:23.683 elif blo < bhi:
2025-07-01 05:44:23.693 g = self._dump('+', b, blo, bhi)
2025-07-01 05:44:23.700
2025-07-01 05:44:23.706 > yield from g
2025-07-01 05:44:23.711
2025-07-01 05:44:23.717 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:44:23.722 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:44:23.727
2025-07-01 05:44:23.733 self = <difflib.Differ object at [hex]>
2025-07-01 05:44:23.739 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:44:23.745 alo = 20, ahi = 1101
2025-07-01 05:44:23.751 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:44:23.757 blo = 20, bhi = 1101
2025-07-01 05:44:23.762
2025-07-01 05:44:23.768 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:44:23.775 r"""
2025-07-01 05:44:23.786 When replacing one block of lines with another, search the blocks
2025-07-01 05:44:23.797 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:44:23.805 synch point, and intraline difference marking is done on the
2025-07-01 05:44:23.815 similar pair. Lots of work, but often worth it.
2025-07-01 05:44:23.822
2025-07-01 05:44:23.833 Example:
2025-07-01 05:44:23.845
2025-07-01 05:44:23.858 >>> d = Differ()
2025-07-01 05:44:23.867 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:44:23.873 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:44:23.879 >>> print(''.join(results), end="")
2025-07-01 05:44:23.885 - abcDefghiJkl
2025-07-01 05:44:23.902 + abcdefGhijkl
2025-07-01 05:44:23.919 """
2025-07-01 05:44:23.931
2025-07-01 05:44:23.944 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:44:23.957 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:44:23.966 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:44:23.974 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:44:23.981 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:44:23.987
2025-07-01 05:44:23.995 # search for the pair that matches best without being identical
2025-07-01 05:44:24.002 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:44:24.012 # on junk -- unless we have to)
2025-07-01 05:44:24.021 for j in range(blo, bhi):
2025-07-01 05:44:24.027 bj = b[j]
2025-07-01 05:44:24.034 cruncher.set_seq2(bj)
2025-07-01 05:44:24.043 for i in range(alo, ahi):
2025-07-01 05:44:24.052 ai = a[i]
2025-07-01 05:44:24.059 if ai == bj:
2025-07-01 05:44:24.067 if eqi is None:
2025-07-01 05:44:24.076 eqi, eqj = i, j
2025-07-01 05:44:24.084 continue
2025-07-01 05:44:24.096 cruncher.set_seq1(ai)
2025-07-01 05:44:24.106 # computing similarity is expensive, so use the quick
2025-07-01 05:44:24.117 # upper bounds first -- have seen this speed up messy
2025-07-01 05:44:24.125 # compares by a factor of 3.
2025-07-01 05:44:24.133 # note that ratio() is only expensive to compute the first
2025-07-01 05:44:24.140 # time it's called on a sequence pair; the expensive part
2025-07-01 05:44:24.148 # of the computation is cached by cruncher
2025-07-01 05:44:24.156 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:44:24.164 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:44:24.171 cruncher.ratio() > best_ratio:
2025-07-01 05:44:24.177 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:44:24.183 if best_ratio < cutoff:
2025-07-01 05:44:24.190 # no non-identical "pretty close" pair
2025-07-01 05:44:24.198 if eqi is None:
2025-07-01 05:44:24.206 # no identical pair either -- treat it as a straight replace
2025-07-01 05:44:24.213 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:44:24.219 return
2025-07-01 05:44:24.225 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:44:24.231 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:44:24.237 else:
2025-07-01 05:44:24.243 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:44:24.249 eqi = None
2025-07-01 05:44:24.259
2025-07-01 05:44:24.268 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:44:24.278 # identical
2025-07-01 05:44:24.286
2025-07-01 05:44:24.292 # pump out diffs from before the synch point
2025-07-01 05:44:24.298 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:44:24.304
2025-07-01 05:44:24.308 # do intraline marking on the synch pair
2025-07-01 05:44:24.313 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:44:24.321 if eqi is None:
2025-07-01 05:44:24.328 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:44:24.334 atags = btags = ""
2025-07-01 05:44:24.339 cruncher.set_seqs(aelt, belt)
2025-07-01 05:44:24.345 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:44:24.357 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:44:24.368 if tag == 'replace':
2025-07-01 05:44:24.378 atags += '^' * la
2025-07-01 05:44:24.391 btags += '^' * lb
2025-07-01 05:44:24.401 elif tag == 'delete':
2025-07-01 05:44:24.410 atags += '-' * la
2025-07-01 05:44:24.421 elif tag == 'insert':
2025-07-01 05:44:24.433 btags += '+' * lb
2025-07-01 05:44:24.441 elif tag == 'equal':
2025-07-01 05:44:24.449 atags += ' ' * la
2025-07-01 05:44:24.456 btags += ' ' * lb
2025-07-01 05:44:24.462 else:
2025-07-01 05:44:24.469 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:44:24.476 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:44:24.483 else:
2025-07-01 05:44:24.490 # the synch pair is identical
2025-07-01 05:44:24.501 yield ' ' + aelt
2025-07-01 05:44:24.511
2025-07-01 05:44:24.519 # pump out diffs from after the synch point
2025-07-01 05:44:24.527 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:44:24.535
2025-07-01 05:44:24.546 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:44:24.553 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:44:24.559
2025-07-01 05:44:24.566 self = <difflib.Differ object at [hex]>
2025-07-01 05:44:24.574 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:44:24.586 alo = 21, ahi = 1101
2025-07-01 05:44:24.596 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:44:24.603 blo = 21, bhi = 1101
2025-07-01 05:44:24.609
2025-07-01 05:44:24.614 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:44:24.618 g = []
2025-07-01 05:44:24.623 if alo < ahi:
2025-07-01 05:44:24.628 if blo < bhi:
2025-07-01 05:44:24.634 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:44:24.639 else:
2025-07-01 05:44:24.645 g = self._dump('-', a, alo, ahi)
2025-07-01 05:44:24.652 elif blo < bhi:
2025-07-01 05:44:24.662 g = self._dump('+', b, blo, bhi)
2025-07-01 05:44:24.668
2025-07-01 05:44:24.674 > yield from g
2025-07-01 05:44:24.680
2025-07-01 05:44:24.685 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:44:24.691 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:44:24.702
2025-07-01 05:44:24.709 self = <difflib.Differ object at [hex]>
2025-07-01 05:44:24.716 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:44:24.722 alo = 21, ahi = 1101
2025-07-01 05:44:24.731 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:44:24.737 blo = 21, bhi = 1101
2025-07-01 05:44:24.744
2025-07-01 05:44:24.750 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:44:24.757 r"""
2025-07-01 05:44:24.764 When replacing one block of lines with another, search the blocks
2025-07-01 05:44:24.770 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:44:24.777 synch point, and intraline difference marking is done on the
2025-07-01 05:44:24.788 similar pair. Lots of work, but often worth it.
2025-07-01 05:44:24.797
2025-07-01 05:44:24.807 Example:
2025-07-01 05:44:24.817
2025-07-01 05:44:24.826 >>> d = Differ()
2025-07-01 05:44:24.835 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:44:24.846 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:44:24.857 >>> print(''.join(results), end="")
2025-07-01 05:44:24.865 - abcDefghiJkl
2025-07-01 05:44:24.886 + abcdefGhijkl
2025-07-01 05:44:24.906 """
2025-07-01 05:44:24.915
2025-07-01 05:44:24.923 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:44:24.929 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:44:24.934 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:44:24.940 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:44:24.946 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:44:24.956
2025-07-01 05:44:24.965 # search for the pair that matches best without being identical
2025-07-01 05:44:24.973 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:44:24.979 # on junk -- unless we have to)
2025-07-01 05:44:24.991 for j in range(blo, bhi):
2025-07-01 05:44:25.001 bj = b[j]
2025-07-01 05:44:25.013 cruncher.set_seq2(bj)
2025-07-01 05:44:25.026 for i in range(alo, ahi):
2025-07-01 05:44:25.035 ai = a[i]
2025-07-01 05:44:25.043 if ai == bj:
2025-07-01 05:44:25.049 if eqi is None:
2025-07-01 05:44:25.055 eqi, eqj = i, j
2025-07-01 05:44:25.061 continue
2025-07-01 05:44:25.067 cruncher.set_seq1(ai)
2025-07-01 05:44:25.073 # computing similarity is expensive, so use the quick
2025-07-01 05:44:25.078 # upper bounds first -- have seen this speed up messy
2025-07-01 05:44:25.084 # compares by a factor of 3.
2025-07-01 05:44:25.090 # note that ratio() is only expensive to compute the first
2025-07-01 05:44:25.096 # time it's called on a sequence pair; the expensive part
2025-07-01 05:44:25.102 # of the computation is cached by cruncher
2025-07-01 05:44:25.108 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:44:25.115 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:44:25.126 cruncher.ratio() > best_ratio:
2025-07-01 05:44:25.134 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:44:25.141 if best_ratio < cutoff:
2025-07-01 05:44:25.152 # no non-identical "pretty close" pair
2025-07-01 05:44:25.163 if eqi is None:
2025-07-01 05:44:25.172 # no identical pair either -- treat it as a straight replace
2025-07-01 05:44:25.180 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:44:25.187 return
2025-07-01 05:44:25.196 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:44:25.210 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:44:25.219 else:
2025-07-01 05:44:25.227 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:44:25.241 eqi = None
2025-07-01 05:44:25.253
2025-07-01 05:44:25.265 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:44:25.278 # identical
2025-07-01 05:44:25.289
2025-07-01 05:44:25.300 # pump out diffs from before the synch point
2025-07-01 05:44:25.311 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:44:25.323
2025-07-01 05:44:25.332 # do intraline marking on the synch pair
2025-07-01 05:44:25.345 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:44:25.356 if eqi is None:
2025-07-01 05:44:25.364 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:44:25.371 atags = btags = ""
2025-07-01 05:44:25.378 cruncher.set_seqs(aelt, belt)
2025-07-01 05:44:25.384 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:44:25.390 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:44:25.396 if tag == 'replace':
2025-07-01 05:44:25.401 atags += '^' * la
2025-07-01 05:44:25.407 btags += '^' * lb
2025-07-01 05:44:25.413 elif tag == 'delete':
2025-07-01 05:44:25.419 atags += '-' * la
2025-07-01 05:44:25.424 elif tag == 'insert':
2025-07-01 05:44:25.430 btags += '+' * lb
2025-07-01 05:44:25.440 elif tag == 'equal':
2025-07-01 05:44:25.451 atags += ' ' * la
2025-07-01 05:44:25.462 btags += ' ' * lb
2025-07-01 05:44:25.474 else:
2025-07-01 05:44:25.487 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:44:25.496 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:44:25.504 else:
2025-07-01 05:44:25.510 # the synch pair is identical
2025-07-01 05:44:25.515 yield ' ' + aelt
2025-07-01 05:44:25.520
2025-07-01 05:44:25.526 # pump out diffs from after the synch point
2025-07-01 05:44:25.531 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:44:25.537
2025-07-01 05:44:25.543 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:44:25.556 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:44:25.567
2025-07-01 05:44:25.579 self = <difflib.Differ object at [hex]>
2025-07-01 05:44:25.590 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:44:25.596 alo = 22, ahi = 1101
2025-07-01 05:44:25.603 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:44:25.611 blo = 22, bhi = 1101
2025-07-01 05:44:25.619
2025-07-01 05:44:25.626 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:44:25.633 g = []
2025-07-01 05:44:25.639 if alo < ahi:
2025-07-01 05:44:25.646 if blo < bhi:
2025-07-01 05:44:25.656 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:44:25.664 else:
2025-07-01 05:44:25.670 g = self._dump('-', a, alo, ahi)
2025-07-01 05:44:25.681 elif blo < bhi:
2025-07-01 05:44:25.691 g = self._dump('+', b, blo, bhi)
2025-07-01 05:44:25.698
2025-07-01 05:44:25.705 > yield from g
2025-07-01 05:44:25.711
2025-07-01 05:44:25.717 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:44:25.723 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:44:25.730
2025-07-01 05:44:25.739 self = <difflib.Differ object at [hex]>
2025-07-01 05:44:25.749 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:44:25.758 alo = 22, ahi = 1101
2025-07-01 05:44:25.766 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:44:25.773 blo = 22, bhi = 1101
2025-07-01 05:44:25.779
2025-07-01 05:44:25.785 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:44:25.790 r"""
2025-07-01 05:44:25.794 When replacing one block of lines with another, search the blocks
2025-07-01 05:44:25.800 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:44:25.805 synch point, and intraline difference marking is done on the
2025-07-01 05:44:25.816 similar pair. Lots of work, but often worth it.
2025-07-01 05:44:25.825
2025-07-01 05:44:25.833 Example:
2025-07-01 05:44:25.840
2025-07-01 05:44:25.847 >>> d = Differ()
2025-07-01 05:44:25.855 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:44:25.868 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:44:25.877 >>> print(''.join(results), end="")
2025-07-01 05:44:25.884 - abcDefghiJkl
2025-07-01 05:44:25.897 + abcdefGhijkl
2025-07-01 05:44:25.910 """
2025-07-01 05:44:25.916
2025-07-01 05:44:25.922 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:44:25.928 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:44:25.934 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:44:25.940 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:44:25.947 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:44:25.958
2025-07-01 05:44:25.964 # search for the pair that matches best without being identical
2025-07-01 05:44:25.970 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:44:25.977 # on junk -- unless we have to)
2025-07-01 05:44:25.982 for j in range(blo, bhi):
2025-07-01 05:44:25.993 bj = b[j]
2025-07-01 05:44:26.006 cruncher.set_seq2(bj)
2025-07-01 05:44:26.014 for i in range(alo, ahi):
2025-07-01 05:44:26.025 ai = a[i]
2025-07-01 05:44:26.034 if ai == bj:
2025-07-01 05:44:26.046 if eqi is None:
2025-07-01 05:44:26.058 eqi, eqj = i, j
2025-07-01 05:44:26.066 continue
2025-07-01 05:44:26.077 cruncher.set_seq1(ai)
2025-07-01 05:44:26.086 # computing similarity is expensive, so use the quick
2025-07-01 05:44:26.094 # upper bounds first -- have seen this speed up messy
2025-07-01 05:44:26.104 # compares by a factor of 3.
2025-07-01 05:44:26.113 # note that ratio() is only expensive to compute the first
2025-07-01 05:44:26.120 # time it's called on a sequence pair; the expensive part
2025-07-01 05:44:26.127 # of the computation is cached by cruncher
2025-07-01 05:44:26.139 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:44:26.149 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:44:26.156 cruncher.ratio() > best_ratio:
2025-07-01 05:44:26.162 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:44:26.169 if best_ratio < cutoff:
2025-07-01 05:44:26.177 # no non-identical "pretty close" pair
2025-07-01 05:44:26.186 if eqi is None:
2025-07-01 05:44:26.193 # no identical pair either -- treat it as a straight replace
2025-07-01 05:44:26.201 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:44:26.207 return
2025-07-01 05:44:26.213 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:44:26.219 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:44:26.227 else:
2025-07-01 05:44:26.238 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:44:26.247 eqi = None
2025-07-01 05:44:26.255
2025-07-01 05:44:26.263 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:44:26.269 # identical
2025-07-01 05:44:26.275
2025-07-01 05:44:26.283 # pump out diffs from before the synch point
2025-07-01 05:44:26.292 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:44:26.303
2025-07-01 05:44:26.311 # do intraline marking on the synch pair
2025-07-01 05:44:26.319 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:44:26.331 if eqi is None:
2025-07-01 05:44:26.341 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:44:26.348 atags = btags = ""
2025-07-01 05:44:26.354 cruncher.set_seqs(aelt, belt)
2025-07-01 05:44:26.363 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:44:26.373 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:44:26.381 if tag == 'replace':
2025-07-01 05:44:26.388 atags += '^' * la
2025-07-01 05:44:26.401 btags += '^' * lb
2025-07-01 05:44:26.411 elif tag == 'delete':
2025-07-01 05:44:26.423 atags += '-' * la
2025-07-01 05:44:26.432 elif tag == 'insert':
2025-07-01 05:44:26.444 btags += '+' * lb
2025-07-01 05:44:26.455 elif tag == 'equal':
2025-07-01 05:44:26.466 atags += ' ' * la
2025-07-01 05:44:26.475 btags += ' ' * lb
2025-07-01 05:44:26.487 else:
2025-07-01 05:44:26.498 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:44:26.511 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:44:26.518 else:
2025-07-01 05:44:26.524 # the synch pair is identical
2025-07-01 05:44:26.531 yield ' ' + aelt
2025-07-01 05:44:26.537
2025-07-01 05:44:26.543 # pump out diffs from after the synch point
2025-07-01 05:44:26.549 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:44:26.555
2025-07-01 05:44:26.561 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:44:26.567 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:44:26.573
2025-07-01 05:44:26.579 self = <difflib.Differ object at [hex]>
2025-07-01 05:44:26.585 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:44:26.592 alo = 23, ahi = 1101
2025-07-01 05:44:26.599 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:44:26.606 blo = 23, bhi = 1101
2025-07-01 05:44:26.612
2025-07-01 05:44:26.618 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:44:26.625 g = []
2025-07-01 05:44:26.631 if alo < ahi:
2025-07-01 05:44:26.637 if blo < bhi:
2025-07-01 05:44:26.643 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:44:26.650 else:
2025-07-01 05:44:26.662 g = self._dump('-', a, alo, ahi)
2025-07-01 05:44:26.672 elif blo < bhi:
2025-07-01 05:44:26.680 g = self._dump('+', b, blo, bhi)
2025-07-01 05:44:26.687
2025-07-01 05:44:26.693 > yield from g
2025-07-01 05:44:26.698
2025-07-01 05:44:26.703 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:44:26.708 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:44:26.712
2025-07-01 05:44:26.717 self = <difflib.Differ object at [hex]>
2025-07-01 05:44:26.723 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:44:26.727 alo = 23, ahi = 1101
2025-07-01 05:44:26.733 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:44:26.737 blo = 23, bhi = 1101
2025-07-01 05:44:26.742
2025-07-01 05:44:26.746 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:44:26.751 r"""
2025-07-01 05:44:26.756 When replacing one block of lines with another, search the blocks
2025-07-01 05:44:26.761 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:44:26.766 synch point, and intraline difference marking is done on the
2025-07-01 05:44:26.772 similar pair. Lots of work, but often worth it.
2025-07-01 05:44:26.778
2025-07-01 05:44:26.785 Example:
2025-07-01 05:44:26.791
2025-07-01 05:44:26.797 >>> d = Differ()
2025-07-01 05:44:26.802 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:44:26.808 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:44:26.814 >>> print(''.join(results), end="")
2025-07-01 05:44:26.820 - abcDefghiJkl
2025-07-01 05:44:26.832 + abcdefGhijkl
2025-07-01 05:44:26.844 """
2025-07-01 05:44:26.850
2025-07-01 05:44:26.856 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:44:26.863 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:44:26.871 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:44:26.878 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:44:26.888 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:44:26.894
2025-07-01 05:44:26.900 # search for the pair that matches best without being identical
2025-07-01 05:44:26.905 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:44:26.916 # on junk -- unless we have to)
2025-07-01 05:44:26.925 for j in range(blo, bhi):
2025-07-01 05:44:26.931 bj = b[j]
2025-07-01 05:44:26.937 cruncher.set_seq2(bj)
2025-07-01 05:44:26.942 for i in range(alo, ahi):
2025-07-01 05:44:26.947 ai = a[i]
2025-07-01 05:44:26.952 if ai == bj:
2025-07-01 05:44:26.957 if eqi is None:
2025-07-01 05:44:26.963 eqi, eqj = i, j
2025-07-01 05:44:26.969 continue
2025-07-01 05:44:26.975 cruncher.set_seq1(ai)
2025-07-01 05:44:26.983 # computing similarity is expensive, so use the quick
2025-07-01 05:44:26.991 # upper bounds first -- have seen this speed up messy
2025-07-01 05:44:26.998 # compares by a factor of 3.
2025-07-01 05:44:27.004 # note that ratio() is only expensive to compute the first
2025-07-01 05:44:27.011 # time it's called on a sequence pair; the expensive part
2025-07-01 05:44:27.018 # of the computation is cached by cruncher
2025-07-01 05:44:27.027 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:44:27.039 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:44:27.047 cruncher.ratio() > best_ratio:
2025-07-01 05:44:27.054 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:44:27.060 if best_ratio < cutoff:
2025-07-01 05:44:27.067 # no non-identical "pretty close" pair
2025-07-01 05:44:27.075 if eqi is None:
2025-07-01 05:44:27.086 # no identical pair either -- treat it as a straight replace
2025-07-01 05:44:27.094 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:44:27.101 return
2025-07-01 05:44:27.106 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:44:27.112 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:44:27.117 else:
2025-07-01 05:44:27.122 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:44:27.128 eqi = None
2025-07-01 05:44:27.136
2025-07-01 05:44:27.152 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:44:27.163 # identical
2025-07-01 05:44:27.171
2025-07-01 05:44:27.177 # pump out diffs from before the synch point
2025-07-01 05:44:27.182 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:44:27.188
2025-07-01 05:44:27.193 # do intraline marking on the synch pair
2025-07-01 05:44:27.199 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:44:27.204 if eqi is None:
2025-07-01 05:44:27.210 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:44:27.218 atags = btags = ""
2025-07-01 05:44:27.225 cruncher.set_seqs(aelt, belt)
2025-07-01 05:44:27.233 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:44:27.242 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:44:27.254 if tag == 'replace':
2025-07-01 05:44:27.264 atags += '^' * la
2025-07-01 05:44:27.273 btags += '^' * lb
2025-07-01 05:44:27.281 elif tag == 'delete':
2025-07-01 05:44:27.294 atags += '-' * la
2025-07-01 05:44:27.308 elif tag == 'insert':
2025-07-01 05:44:27.317 btags += '+' * lb
2025-07-01 05:44:27.324 elif tag == 'equal':
2025-07-01 05:44:27.330 atags += ' ' * la
2025-07-01 05:44:27.336 btags += ' ' * lb
2025-07-01 05:44:27.342 else:
2025-07-01 05:44:27.349 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:44:27.355 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:44:27.360 else:
2025-07-01 05:44:27.366 # the synch pair is identical
2025-07-01 05:44:27.373 yield ' ' + aelt
2025-07-01 05:44:27.378
2025-07-01 05:44:27.384 # pump out diffs from after the synch point
2025-07-01 05:44:27.389 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:44:27.400
2025-07-01 05:44:27.410 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:44:27.418 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:44:27.429
2025-07-01 05:44:27.443 self = <difflib.Differ object at [hex]>
2025-07-01 05:44:27.456 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:44:27.465 alo = 26, ahi = 1101
2025-07-01 05:44:27.476 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:44:27.488 blo = 26, bhi = 1101
2025-07-01 05:44:27.496
2025-07-01 05:44:27.507 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:44:27.518 g = []
2025-07-01 05:44:27.529 if alo < ahi:
2025-07-01 05:44:27.536 if blo < bhi:
2025-07-01 05:44:27.542 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:44:27.546 else:
2025-07-01 05:44:27.551 g = self._dump('-', a, alo, ahi)
2025-07-01 05:44:27.556 elif blo < bhi:
2025-07-01 05:44:27.560 g = self._dump('+', b, blo, bhi)
2025-07-01 05:44:27.565
2025-07-01 05:44:27.571 > yield from g
2025-07-01 05:44:27.577
2025-07-01 05:44:27.583 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:44:27.591 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:44:27.598
2025-07-01 05:44:27.608 self = <difflib.Differ object at [hex]>
2025-07-01 05:44:27.618 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:44:27.625 alo = 26, ahi = 1101
2025-07-01 05:44:27.631 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:44:27.637 blo = 26, bhi = 1101
2025-07-01 05:44:27.643
2025-07-01 05:44:27.648 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:44:27.654 r"""
2025-07-01 05:44:27.665 When replacing one block of lines with another, search the blocks
2025-07-01 05:44:27.677 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:44:27.688 synch point, and intraline difference marking is done on the
2025-07-01 05:44:27.700 similar pair. Lots of work, but often worth it.
2025-07-01 05:44:27.712
2025-07-01 05:44:27.720 Example:
2025-07-01 05:44:27.727
2025-07-01 05:44:27.733 >>> d = Differ()
2025-07-01 05:44:27.746 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:44:27.757 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:44:27.768 >>> print(''.join(results), end="")
2025-07-01 05:44:27.779 - abcDefghiJkl
2025-07-01 05:44:27.798 + abcdefGhijkl
2025-07-01 05:44:27.809 """
2025-07-01 05:44:27.814
2025-07-01 05:44:27.820 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:44:27.826 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:44:27.832 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:44:27.838 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:44:27.850 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:44:27.861
2025-07-01 05:44:27.873 # search for the pair that matches best without being identical
2025-07-01 05:44:27.883 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:44:27.890 # on junk -- unless we have to)
2025-07-01 05:44:27.900 for j in range(blo, bhi):
2025-07-01 05:44:27.910 bj = b[j]
2025-07-01 05:44:27.917 cruncher.set_seq2(bj)
2025-07-01 05:44:27.924 for i in range(alo, ahi):
2025-07-01 05:44:27.931 ai = a[i]
2025-07-01 05:44:27.936 if ai == bj:
2025-07-01 05:44:27.941 if eqi is None:
2025-07-01 05:44:27.946 eqi, eqj = i, j
2025-07-01 05:44:27.950 continue
2025-07-01 05:44:27.955 cruncher.set_seq1(ai)
2025-07-01 05:44:27.959 # computing similarity is expensive, so use the quick
2025-07-01 05:44:27.964 # upper bounds first -- have seen this speed up messy
2025-07-01 05:44:27.972 # compares by a factor of 3.
2025-07-01 05:44:27.982 # note that ratio() is only expensive to compute the first
2025-07-01 05:44:27.988 # time it's called on a sequence pair; the expensive part
2025-07-01 05:44:27.995 # of the computation is cached by cruncher
2025-07-01 05:44:28.003 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:44:28.013 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:44:28.022 cruncher.ratio() > best_ratio:
2025-07-01 05:44:28.028 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:44:28.034 if best_ratio < cutoff:
2025-07-01 05:44:28.040 # no non-identical "pretty close" pair
2025-07-01 05:44:28.047 if eqi is None:
2025-07-01 05:44:28.060 # no identical pair either -- treat it as a straight replace
2025-07-01 05:44:28.069 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:44:28.075 return
2025-07-01 05:44:28.081 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:44:28.087 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:44:28.091 else:
2025-07-01 05:44:28.096 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:44:28.103 eqi = None
2025-07-01 05:44:28.109
2025-07-01 05:44:28.114 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:44:28.119 # identical
2025-07-01 05:44:28.124
2025-07-01 05:44:28.128 # pump out diffs from before the synch point
2025-07-01 05:44:28.133 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:44:28.137
2025-07-01 05:44:28.141 # do intraline marking on the synch pair
2025-07-01 05:44:28.145 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:44:28.150 if eqi is None:
2025-07-01 05:44:28.154 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:44:28.158 atags = btags = ""
2025-07-01 05:44:28.163 cruncher.set_seqs(aelt, belt)
2025-07-01 05:44:28.167 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:44:28.171 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:44:28.176 if tag == 'replace':
2025-07-01 05:44:28.180 atags += '^' * la
2025-07-01 05:44:28.186 btags += '^' * lb
2025-07-01 05:44:28.191 elif tag == 'delete':
2025-07-01 05:44:28.196 atags += '-' * la
2025-07-01 05:44:28.201 elif tag == 'insert':
2025-07-01 05:44:28.206 btags += '+' * lb
2025-07-01 05:44:28.210 elif tag == 'equal':
2025-07-01 05:44:28.214 atags += ' ' * la
2025-07-01 05:44:28.219 btags += ' ' * lb
2025-07-01 05:44:28.223 else:
2025-07-01 05:44:28.228 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:44:28.232 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:44:28.236 else:
2025-07-01 05:44:28.241 # the synch pair is identical
2025-07-01 05:44:28.245 yield ' ' + aelt
2025-07-01 05:44:28.249
2025-07-01 05:44:28.254 # pump out diffs from after the synch point
2025-07-01 05:44:28.258 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:44:28.263
2025-07-01 05:44:28.267 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:44:28.271 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:44:28.276
2025-07-01 05:44:28.280 self = <difflib.Differ object at [hex]>
2025-07-01 05:44:28.285 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:44:28.289 alo = 27, ahi = 1101
2025-07-01 05:44:28.294 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:44:28.298 blo = 27, bhi = 1101
2025-07-01 05:44:28.302
2025-07-01 05:44:28.307 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:44:28.311 g = []
2025-07-01 05:44:28.316 if alo < ahi:
2025-07-01 05:44:28.322 if blo < bhi:
2025-07-01 05:44:28.332 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:44:28.339 else:
2025-07-01 05:44:28.350 g = self._dump('-', a, alo, ahi)
2025-07-01 05:44:28.359 elif blo < bhi:
2025-07-01 05:44:28.369 g = self._dump('+', b, blo, bhi)
2025-07-01 05:44:28.380
2025-07-01 05:44:28.393 > yield from g
2025-07-01 05:44:28.406
2025-07-01 05:44:28.415 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:44:28.424 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:44:28.434
2025-07-01 05:44:28.445 self = <difflib.Differ object at [hex]>
2025-07-01 05:44:28.455 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:44:28.464 alo = 27, ahi = 1101
2025-07-01 05:44:28.472 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:44:28.480 blo = 27, bhi = 1101
2025-07-01 05:44:28.487
2025-07-01 05:44:28.498 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:44:28.505 r"""
2025-07-01 05:44:28.512 When replacing one block of lines with another, search the blocks
2025-07-01 05:44:28.517 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:44:28.522 synch point, and intraline difference marking is done on the
2025-07-01 05:44:28.526 similar pair. Lots of work, but often worth it.
2025-07-01 05:44:28.533
2025-07-01 05:44:28.540 Example:
2025-07-01 05:44:28.547
2025-07-01 05:44:28.554 >>> d = Differ()
2025-07-01 05:44:28.567 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:44:28.576 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:44:28.590 >>> print(''.join(results), end="")
2025-07-01 05:44:28.600 - abcDefghiJkl
2025-07-01 05:44:28.623 + abcdefGhijkl
2025-07-01 05:44:28.640 """
2025-07-01 05:44:28.649
2025-07-01 05:44:28.661 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:44:28.672 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:44:28.681 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:44:28.694 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:44:28.705 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:44:28.712
2025-07-01 05:44:28.717 # search for the pair that matches best without being identical
2025-07-01 05:44:28.722 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:44:28.726 # on junk -- unless we have to)
2025-07-01 05:44:28.731 for j in range(blo, bhi):
2025-07-01 05:44:28.735 bj = b[j]
2025-07-01 05:44:28.739 cruncher.set_seq2(bj)
2025-07-01 05:44:28.743 for i in range(alo, ahi):
2025-07-01 05:44:28.748 ai = a[i]
2025-07-01 05:44:28.752 if ai == bj:
2025-07-01 05:44:28.756 if eqi is None:
2025-07-01 05:44:28.761 eqi, eqj = i, j
2025-07-01 05:44:28.765 continue
2025-07-01 05:44:28.769 cruncher.set_seq1(ai)
2025-07-01 05:44:28.774 # computing similarity is expensive, so use the quick
2025-07-01 05:44:28.778 # upper bounds first -- have seen this speed up messy
2025-07-01 05:44:28.789 # compares by a factor of 3.
2025-07-01 05:44:28.797 # note that ratio() is only expensive to compute the first
2025-07-01 05:44:28.807 # time it's called on a sequence pair; the expensive part
2025-07-01 05:44:28.814 # of the computation is cached by cruncher
2025-07-01 05:44:28.822 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:44:28.830 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:44:28.842 cruncher.ratio() > best_ratio:
2025-07-01 05:44:28.852 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:44:28.863 if best_ratio < cutoff:
2025-07-01 05:44:28.871 # no non-identical "pretty close" pair
2025-07-01 05:44:28.877 if eqi is None:
2025-07-01 05:44:28.882 # no identical pair either -- treat it as a straight replace
2025-07-01 05:44:28.887 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:44:28.891 return
2025-07-01 05:44:28.899 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:44:28.910 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:44:28.918 else:
2025-07-01 05:44:28.926 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:44:28.936 eqi = None
2025-07-01 05:44:28.945
2025-07-01 05:44:28.956 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:44:28.967 # identical
2025-07-01 05:44:28.977
2025-07-01 05:44:28.989 # pump out diffs from before the synch point
2025-07-01 05:44:29.000 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:44:29.012
2025-07-01 05:44:29.021 # do intraline marking on the synch pair
2025-07-01 05:44:29.029 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:44:29.036 if eqi is None:
2025-07-01 05:44:29.042 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:44:29.049 atags = btags = ""
2025-07-01 05:44:29.055 cruncher.set_seqs(aelt, belt)
2025-07-01 05:44:29.063 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:44:29.075 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:44:29.083 if tag == 'replace':
2025-07-01 05:44:29.089 atags += '^' * la
2025-07-01 05:44:29.099 btags += '^' * lb
2025-07-01 05:44:29.108 elif tag == 'delete':
2025-07-01 05:44:29.116 atags += '-' * la
2025-07-01 05:44:29.123 elif tag == 'insert':
2025-07-01 05:44:29.128 btags += '+' * lb
2025-07-01 05:44:29.134 elif tag == 'equal':
2025-07-01 05:44:29.144 atags += ' ' * la
2025-07-01 05:44:29.153 btags += ' ' * lb
2025-07-01 05:44:29.160 else:
2025-07-01 05:44:29.166 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:44:29.178 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:44:29.189 else:
2025-07-01 05:44:29.203 # the synch pair is identical
2025-07-01 05:44:29.212 yield ' ' + aelt
2025-07-01 05:44:29.219
2025-07-01 05:44:29.226 # pump out diffs from after the synch point
2025-07-01 05:44:29.235 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:44:29.246
2025-07-01 05:44:29.257 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:44:29.265 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:44:29.271
2025-07-01 05:44:29.277 self = <difflib.Differ object at [hex]>
2025-07-01 05:44:29.282 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:44:29.294 alo = 28, ahi = 1101
2025-07-01 05:44:29.304 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:44:29.310 blo = 28, bhi = 1101
2025-07-01 05:44:29.316
2025-07-01 05:44:29.321 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:44:29.326 g = []
2025-07-01 05:44:29.331 if alo < ahi:
2025-07-01 05:44:29.336 if blo < bhi:
2025-07-01 05:44:29.343 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:44:29.349 else:
2025-07-01 05:44:29.356 g = self._dump('-', a, alo, ahi)
2025-07-01 05:44:29.366 elif blo < bhi:
2025-07-01 05:44:29.377 g = self._dump('+', b, blo, bhi)
2025-07-01 05:44:29.387
2025-07-01 05:44:29.399 > yield from g
2025-07-01 05:44:29.408
2025-07-01 05:44:29.415 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:44:29.422 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:44:29.427
2025-07-01 05:44:29.432 self = <difflib.Differ object at [hex]>
2025-07-01 05:44:29.438 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:44:29.442 alo = 28, ahi = 1101
2025-07-01 05:44:29.447 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:44:29.452 blo = 28, bhi = 1101
2025-07-01 05:44:29.456
2025-07-01 05:44:29.462 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:44:29.468 r"""
2025-07-01 05:44:29.473 When replacing one block of lines with another, search the blocks
2025-07-01 05:44:29.478 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:44:29.484 synch point, and intraline difference marking is done on the
2025-07-01 05:44:29.490 similar pair. Lots of work, but often worth it.
2025-07-01 05:44:29.496
2025-07-01 05:44:29.502 Example:
2025-07-01 05:44:29.511
2025-07-01 05:44:29.522 >>> d = Differ()
2025-07-01 05:44:29.529 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:44:29.536 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:44:29.543 >>> print(''.join(results), end="")
2025-07-01 05:44:29.553 - abcDefghiJkl
2025-07-01 05:44:29.574 + abcdefGhijkl
2025-07-01 05:44:29.598 """
2025-07-01 05:44:29.610
2025-07-01 05:44:29.622 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:44:29.630 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:44:29.637 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:44:29.651 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:44:29.661 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:44:29.667
2025-07-01 05:44:29.673 # search for the pair that matches best without being identical
2025-07-01 05:44:29.679 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:44:29.685 # on junk -- unless we have to)
2025-07-01 05:44:29.690 for j in range(blo, bhi):
2025-07-01 05:44:29.700 bj = b[j]
2025-07-01 05:44:29.709 cruncher.set_seq2(bj)
2025-07-01 05:44:29.715 for i in range(alo, ahi):
2025-07-01 05:44:29.721 ai = a[i]
2025-07-01 05:44:29.727 if ai == bj:
2025-07-01 05:44:29.733 if eqi is None:
2025-07-01 05:44:29.739 eqi, eqj = i, j
2025-07-01 05:44:29.744 continue
2025-07-01 05:44:29.750 cruncher.set_seq1(ai)
2025-07-01 05:44:29.760 # computing similarity is expensive, so use the quick
2025-07-01 05:44:29.769 # upper bounds first -- have seen this speed up messy
2025-07-01 05:44:29.776 # compares by a factor of 3.
2025-07-01 05:44:29.783 # note that ratio() is only expensive to compute the first
2025-07-01 05:44:29.790 # time it's called on a sequence pair; the expensive part
2025-07-01 05:44:29.799 # of the computation is cached by cruncher
2025-07-01 05:44:29.809 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:44:29.817 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:44:29.824 cruncher.ratio() > best_ratio:
2025-07-01 05:44:29.830 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:44:29.840 if best_ratio < cutoff:
2025-07-01 05:44:29.849 # no non-identical "pretty close" pair
2025-07-01 05:44:29.857 if eqi is None:
2025-07-01 05:44:29.864 # no identical pair either -- treat it as a straight replace
2025-07-01 05:44:29.870 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:44:29.876 return
2025-07-01 05:44:29.882 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:44:29.888 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:44:29.894 else:
2025-07-01 05:44:29.901 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:44:29.907 eqi = None
2025-07-01 05:44:29.915
2025-07-01 05:44:29.927 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:44:29.935 # identical
2025-07-01 05:44:29.943
2025-07-01 05:44:29.957 # pump out diffs from before the synch point
2025-07-01 05:44:29.967 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:44:29.975
2025-07-01 05:44:29.983 # do intraline marking on the synch pair
2025-07-01 05:44:29.995 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:44:30.004 if eqi is None:
2025-07-01 05:44:30.012 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:44:30.019 atags = btags = ""
2025-07-01 05:44:30.028 cruncher.set_seqs(aelt, belt)
2025-07-01 05:44:30.039 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:44:30.048 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:44:30.055 if tag == 'replace':
2025-07-01 05:44:30.062 atags += '^' * la
2025-07-01 05:44:30.068 btags += '^' * lb
2025-07-01 05:44:30.074 elif tag == 'delete':
2025-07-01 05:44:30.079 atags += '-' * la
2025-07-01 05:44:30.084 elif tag == 'insert':
2025-07-01 05:44:30.090 btags += '+' * lb
2025-07-01 05:44:30.096 elif tag == 'equal':
2025-07-01 05:44:30.102 atags += ' ' * la
2025-07-01 05:44:30.107 btags += ' ' * lb
2025-07-01 05:44:30.112 else:
2025-07-01 05:44:30.118 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:44:30.123 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:44:30.128 else:
2025-07-01 05:44:30.135 # the synch pair is identical
2025-07-01 05:44:30.146 yield ' ' + aelt
2025-07-01 05:44:30.155
2025-07-01 05:44:30.167 # pump out diffs from after the synch point
2025-07-01 05:44:30.179 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:44:30.188
2025-07-01 05:44:30.197 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:44:30.203 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:44:30.210
2025-07-01 05:44:30.215 self = <difflib.Differ object at [hex]>
2025-07-01 05:44:30.228 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:44:30.235 alo = 29, ahi = 1101
2025-07-01 05:44:30.245 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:44:30.252 blo = 29, bhi = 1101
2025-07-01 05:44:30.259
2025-07-01 05:44:30.267 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:44:30.277 g = []
2025-07-01 05:44:30.286 if alo < ahi:
2025-07-01 05:44:30.293 if blo < bhi:
2025-07-01 05:44:30.299 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:44:30.307 else:
2025-07-01 05:44:30.318 g = self._dump('-', a, alo, ahi)
2025-07-01 05:44:30.328 elif blo < bhi:
2025-07-01 05:44:30.339 g = self._dump('+', b, blo, bhi)
2025-07-01 05:44:30.350
2025-07-01 05:44:30.360 > yield from g
2025-07-01 05:44:30.373
2025-07-01 05:44:30.384 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:44:30.396 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:44:30.409
2025-07-01 05:44:30.420 self = <difflib.Differ object at [hex]>
2025-07-01 05:44:30.433 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:44:30.445 alo = 29, ahi = 1101
2025-07-01 05:44:30.458 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:44:30.468 blo = 29, bhi = 1101
2025-07-01 05:44:30.476
2025-07-01 05:44:30.484 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:44:30.490 r"""
2025-07-01 05:44:30.499 When replacing one block of lines with another, search the blocks
2025-07-01 05:44:30.510 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:44:30.519 synch point, and intraline difference marking is done on the
2025-07-01 05:44:30.530 similar pair. Lots of work, but often worth it.
2025-07-01 05:44:30.540
2025-07-01 05:44:30.553 Example:
2025-07-01 05:44:30.563
2025-07-01 05:44:30.573 >>> d = Differ()
2025-07-01 05:44:30.581 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:44:30.587 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:44:30.593 >>> print(''.join(results), end="")
2025-07-01 05:44:30.598 - abcDefghiJkl
2025-07-01 05:44:30.611 + abcdefGhijkl
2025-07-01 05:44:30.629 """
2025-07-01 05:44:30.637
2025-07-01 05:44:30.644 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:44:30.650 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:44:30.660 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:44:30.670 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:44:30.678 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:44:30.684
2025-07-01 05:44:30.692 # search for the pair that matches best without being identical
2025-07-01 05:44:30.699 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:44:30.708 # on junk -- unless we have to)
2025-07-01 05:44:30.716 for j in range(blo, bhi):
2025-07-01 05:44:30.723 bj = b[j]
2025-07-01 05:44:30.731 cruncher.set_seq2(bj)
2025-07-01 05:44:30.742 for i in range(alo, ahi):
2025-07-01 05:44:30.750 ai = a[i]
2025-07-01 05:44:30.757 if ai == bj:
2025-07-01 05:44:30.768 if eqi is None:
2025-07-01 05:44:30.778 eqi, eqj = i, j
2025-07-01 05:44:30.787 continue
2025-07-01 05:44:30.797 cruncher.set_seq1(ai)
2025-07-01 05:44:30.806 # computing similarity is expensive, so use the quick
2025-07-01 05:44:30.815 # upper bounds first -- have seen this speed up messy
2025-07-01 05:44:30.826 # compares by a factor of 3.
2025-07-01 05:44:30.834 # note that ratio() is only expensive to compute the first
2025-07-01 05:44:30.841 # time it's called on a sequence pair; the expensive part
2025-07-01 05:44:30.851 # of the computation is cached by cruncher
2025-07-01 05:44:30.860 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:44:30.872 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:44:30.883 cruncher.ratio() > best_ratio:
2025-07-01 05:44:30.894 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:44:30.904 if best_ratio < cutoff:
2025-07-01 05:44:30.913 # no non-identical "pretty close" pair
2025-07-01 05:44:30.919 if eqi is None:
2025-07-01 05:44:30.925 # no identical pair either -- treat it as a straight replace
2025-07-01 05:44:30.931 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:44:30.937 return
2025-07-01 05:44:30.942 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:44:30.953 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:44:30.960 else:
2025-07-01 05:44:30.967 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:44:30.981 eqi = None
2025-07-01 05:44:30.991
2025-07-01 05:44:31.003 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:44:31.012 # identical
2025-07-01 05:44:31.018
2025-07-01 05:44:31.024 # pump out diffs from before the synch point
2025-07-01 05:44:31.030 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:44:31.036
2025-07-01 05:44:31.043 # do intraline marking on the synch pair
2025-07-01 05:44:31.054 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:44:31.062 if eqi is None:
2025-07-01 05:44:31.069 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:44:31.075 atags = btags = ""
2025-07-01 05:44:31.082 cruncher.set_seqs(aelt, belt)
2025-07-01 05:44:31.098 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:44:31.108 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:44:31.115 if tag == 'replace':
2025-07-01 05:44:31.122 atags += '^' * la
2025-07-01 05:44:31.129 btags += '^' * lb
2025-07-01 05:44:31.136 elif tag == 'delete':
2025-07-01 05:44:31.148 atags += '-' * la
2025-07-01 05:44:31.158 elif tag == 'insert':
2025-07-01 05:44:31.165 btags += '+' * lb
2025-07-01 05:44:31.171 elif tag == 'equal':
2025-07-01 05:44:31.177 atags += ' ' * la
2025-07-01 05:44:31.185 btags += ' ' * lb
2025-07-01 05:44:31.191 else:
2025-07-01 05:44:31.197 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:44:31.207 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:44:31.214 else:
2025-07-01 05:44:31.220 # the synch pair is identical
2025-07-01 05:44:31.225 yield ' ' + aelt
2025-07-01 05:44:31.233
2025-07-01 05:44:31.240 # pump out diffs from after the synch point
2025-07-01 05:44:31.245 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:44:31.251
2025-07-01 05:44:31.255 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:44:31.260 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:44:31.267
2025-07-01 05:44:31.273 self = <difflib.Differ object at [hex]>
2025-07-01 05:44:31.283 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:44:31.289 alo = 30, ahi = 1101
2025-07-01 05:44:31.299 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:44:31.305 blo = 30, bhi = 1101
2025-07-01 05:44:31.315
2025-07-01 05:44:31.323 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:44:31.330 g = []
2025-07-01 05:44:31.336 if alo < ahi:
2025-07-01 05:44:31.343 if blo < bhi:
2025-07-01 05:44:31.349 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:44:31.355 else:
2025-07-01 05:44:31.361 g = self._dump('-', a, alo, ahi)
2025-07-01 05:44:31.369 elif blo < bhi:
2025-07-01 05:44:31.374 g = self._dump('+', b, blo, bhi)
2025-07-01 05:44:31.379
2025-07-01 05:44:31.386 > yield from g
2025-07-01 05:44:31.392
2025-07-01 05:44:31.403 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:44:31.412 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:44:31.419
2025-07-01 05:44:31.424 self = <difflib.Differ object at [hex]>
2025-07-01 05:44:31.430 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:44:31.437 alo = 30, ahi = 1101
2025-07-01 05:44:31.442 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:44:31.448 blo = 30, bhi = 1101
2025-07-01 05:44:31.454
2025-07-01 05:44:31.460 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:44:31.466 r"""
2025-07-01 05:44:31.474 When replacing one block of lines with another, search the blocks
2025-07-01 05:44:31.484 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:44:31.496 synch point, and intraline difference marking is done on the
2025-07-01 05:44:31.506 similar pair. Lots of work, but often worth it.
2025-07-01 05:44:31.513
2025-07-01 05:44:31.519 Example:
2025-07-01 05:44:31.526
2025-07-01 05:44:31.534 >>> d = Differ()
2025-07-01 05:44:31.541 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:44:31.548 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:44:31.554 >>> print(''.join(results), end="")
2025-07-01 05:44:31.561 - abcDefghiJkl
2025-07-01 05:44:31.573 + abcdefGhijkl
2025-07-01 05:44:31.587 """
2025-07-01 05:44:31.595
2025-07-01 05:44:31.602 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:44:31.610 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:44:31.622 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:44:31.632 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:44:31.639 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:44:31.647
2025-07-01 05:44:31.654 # search for the pair that matches best without being identical
2025-07-01 05:44:31.662 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:44:31.671 # on junk -- unless we have to)
2025-07-01 05:44:31.681 for j in range(blo, bhi):
2025-07-01 05:44:31.691 bj = b[j]
2025-07-01 05:44:31.702 cruncher.set_seq2(bj)
2025-07-01 05:44:31.711 for i in range(alo, ahi):
2025-07-01 05:44:31.720 ai = a[i]
2025-07-01 05:44:31.731 if ai == bj:
2025-07-01 05:44:31.739 if eqi is None:
2025-07-01 05:44:31.744 eqi, eqj = i, j
2025-07-01 05:44:31.756 continue
2025-07-01 05:44:31.768 cruncher.set_seq1(ai)
2025-07-01 05:44:31.777 # computing similarity is expensive, so use the quick
2025-07-01 05:44:31.785 # upper bounds first -- have seen this speed up messy
2025-07-01 05:44:31.792 # compares by a factor of 3.
2025-07-01 05:44:31.799 # note that ratio() is only expensive to compute the first
2025-07-01 05:44:31.807 # time it's called on a sequence pair; the expensive part
2025-07-01 05:44:31.815 # of the computation is cached by cruncher
2025-07-01 05:44:31.828 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:44:31.837 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:44:31.845 cruncher.ratio() > best_ratio:
2025-07-01 05:44:31.850 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:44:31.856 if best_ratio < cutoff:
2025-07-01 05:44:31.860 # no non-identical "pretty close" pair
2025-07-01 05:44:31.865 if eqi is None:
2025-07-01 05:44:31.871 # no identical pair either -- treat it as a straight replace
2025-07-01 05:44:31.882 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:44:31.891 return
2025-07-01 05:44:31.900 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:44:31.909 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:44:31.916 else:
2025-07-01 05:44:31.923 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:44:31.930 eqi = None
2025-07-01 05:44:31.941
2025-07-01 05:44:31.952 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:44:31.958 # identical
2025-07-01 05:44:31.964
2025-07-01 05:44:31.975 # pump out diffs from before the synch point
2025-07-01 05:44:31.984 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:44:31.991
2025-07-01 05:44:31.998 # do intraline marking on the synch pair
2025-07-01 05:44:32.005 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:44:32.015 if eqi is None:
2025-07-01 05:44:32.022 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:44:32.029 atags = btags = ""
2025-07-01 05:44:32.039 cruncher.set_seqs(aelt, belt)
2025-07-01 05:44:32.045 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:44:32.053 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:44:32.060 if tag == 'replace':
2025-07-01 05:44:32.067 atags += '^' * la
2025-07-01 05:44:32.074 btags += '^' * lb
2025-07-01 05:44:32.080 elif tag == 'delete':
2025-07-01 05:44:32.086 atags += '-' * la
2025-07-01 05:44:32.092 elif tag == 'insert':
2025-07-01 05:44:32.099 btags += '+' * lb
2025-07-01 05:44:32.104 elif tag == 'equal':
2025-07-01 05:44:32.110 atags += ' ' * la
2025-07-01 05:44:32.117 btags += ' ' * lb
2025-07-01 05:44:32.123 else:
2025-07-01 05:44:32.133 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:44:32.142 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:44:32.151 else:
2025-07-01 05:44:32.165 # the synch pair is identical
2025-07-01 05:44:32.174 yield ' ' + aelt
2025-07-01 05:44:32.182
2025-07-01 05:44:32.188 # pump out diffs from after the synch point
2025-07-01 05:44:32.195 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:44:32.208
2025-07-01 05:44:32.216 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:44:32.223 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:44:32.234
2025-07-01 05:44:32.246 self = <difflib.Differ object at [hex]>
2025-07-01 05:44:32.255 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:44:32.262 alo = 31, ahi = 1101
2025-07-01 05:44:32.271 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:44:32.278 blo = 31, bhi = 1101
2025-07-01 05:44:32.285
2025-07-01 05:44:32.292 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:44:32.299 g = []
2025-07-01 05:44:32.309 if alo < ahi:
2025-07-01 05:44:32.316 if blo < bhi:
2025-07-01 05:44:32.324 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:44:32.331 else:
2025-07-01 05:44:32.344 g = self._dump('-', a, alo, ahi)
2025-07-01 05:44:32.355 elif blo < bhi:
2025-07-01 05:44:32.366 g = self._dump('+', b, blo, bhi)
2025-07-01 05:44:32.375
2025-07-01 05:44:32.384 > yield from g
2025-07-01 05:44:32.391
2025-07-01 05:44:32.397 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:44:32.404 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:44:32.410
2025-07-01 05:44:32.415 self = <difflib.Differ object at [hex]>
2025-07-01 05:44:32.424 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:44:32.432 alo = 31, ahi = 1101
2025-07-01 05:44:32.443 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:44:32.451 blo = 31, bhi = 1101
2025-07-01 05:44:32.461
2025-07-01 05:44:32.471 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:44:32.478 r"""
2025-07-01 05:44:32.486 When replacing one block of lines with another, search the blocks
2025-07-01 05:44:32.497 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:44:32.507 synch point, and intraline difference marking is done on the
2025-07-01 05:44:32.514 similar pair. Lots of work, but often worth it.
2025-07-01 05:44:32.521
2025-07-01 05:44:32.528 Example:
2025-07-01 05:44:32.535
2025-07-01 05:44:32.542 >>> d = Differ()
2025-07-01 05:44:32.550 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:44:32.559 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:44:32.571 >>> print(''.join(results), end="")
2025-07-01 05:44:32.582 - abcDefghiJkl
2025-07-01 05:44:32.596 + abcdefGhijkl
2025-07-01 05:44:32.609 """
2025-07-01 05:44:32.615
2025-07-01 05:44:32.622 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:44:32.631 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:44:32.642 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:44:32.650 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:44:32.657 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:44:32.665
2025-07-01 05:44:32.672 # search for the pair that matches best without being identical
2025-07-01 05:44:32.679 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:44:32.686 # on junk -- unless we have to)
2025-07-01 05:44:32.695 for j in range(blo, bhi):
2025-07-01 05:44:32.703 bj = b[j]
2025-07-01 05:44:32.711 cruncher.set_seq2(bj)
2025-07-01 05:44:32.718 for i in range(alo, ahi):
2025-07-01 05:44:32.730 ai = a[i]
2025-07-01 05:44:32.743 if ai == bj:
2025-07-01 05:44:32.754 if eqi is None:
2025-07-01 05:44:32.762 eqi, eqj = i, j
2025-07-01 05:44:32.769 continue
2025-07-01 05:44:32.776 cruncher.set_seq1(ai)
2025-07-01 05:44:32.787 # computing similarity is expensive, so use the quick
2025-07-01 05:44:32.796 # upper bounds first -- have seen this speed up messy
2025-07-01 05:44:32.803 # compares by a factor of 3.
2025-07-01 05:44:32.811 # note that ratio() is only expensive to compute the first
2025-07-01 05:44:32.818 # time it's called on a sequence pair; the expensive part
2025-07-01 05:44:32.828 # of the computation is cached by cruncher
2025-07-01 05:44:32.835 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:44:32.843 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:44:32.851 cruncher.ratio() > best_ratio:
2025-07-01 05:44:32.865 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:44:32.875 if best_ratio < cutoff:
2025-07-01 05:44:32.882 # no non-identical "pretty close" pair
2025-07-01 05:44:32.889 if eqi is None:
2025-07-01 05:44:32.897 # no identical pair either -- treat it as a straight replace
2025-07-01 05:44:32.905 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:44:32.912 return
2025-07-01 05:44:32.923 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:44:32.933 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:44:32.945 else:
2025-07-01 05:44:32.954 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:44:32.961 eqi = None
2025-07-01 05:44:32.968
2025-07-01 05:44:32.974 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:44:32.981 # identical
2025-07-01 05:44:32.986
2025-07-01 05:44:32.991 # pump out diffs from before the synch point
2025-07-01 05:44:32.997 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:44:33.002
2025-07-01 05:44:33.008 # do intraline marking on the synch pair
2025-07-01 05:44:33.014 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:44:33.019 if eqi is None:
2025-07-01 05:44:33.026 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:44:33.035 atags = btags = ""
2025-07-01 05:44:33.043 cruncher.set_seqs(aelt, belt)
2025-07-01 05:44:33.050 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:44:33.056 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:44:33.062 if tag == 'replace':
2025-07-01 05:44:33.072 atags += '^' * la
2025-07-01 05:44:33.083 btags += '^' * lb
2025-07-01 05:44:33.090 elif tag == 'delete':
2025-07-01 05:44:33.097 atags += '-' * la
2025-07-01 05:44:33.103 elif tag == 'insert':
2025-07-01 05:44:33.111 btags += '+' * lb
2025-07-01 05:44:33.121 elif tag == 'equal':
2025-07-01 05:44:33.128 atags += ' ' * la
2025-07-01 05:44:33.134 btags += ' ' * lb
2025-07-01 05:44:33.141 else:
2025-07-01 05:44:33.147 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:44:33.159 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:44:33.171 else:
2025-07-01 05:44:33.180 # the synch pair is identical
2025-07-01 05:44:33.188 yield ' ' + aelt
2025-07-01 05:44:33.194
2025-07-01 05:44:33.206 # pump out diffs from after the synch point
2025-07-01 05:44:33.216 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:44:33.224
2025-07-01 05:44:33.231 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:44:33.238 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:44:33.243
2025-07-01 05:44:33.251 self = <difflib.Differ object at [hex]>
2025-07-01 05:44:33.261 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:44:33.269 alo = 32, ahi = 1101
2025-07-01 05:44:33.276 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:44:33.281 blo = 32, bhi = 1101
2025-07-01 05:44:33.287
2025-07-01 05:44:33.293 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:44:33.299 g = []
2025-07-01 05:44:33.304 if alo < ahi:
2025-07-01 05:44:33.310 if blo < bhi:
2025-07-01 05:44:33.317 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:44:33.323 else:
2025-07-01 05:44:33.329 g = self._dump('-', a, alo, ahi)
2025-07-01 05:44:33.335 elif blo < bhi:
2025-07-01 05:44:33.343 g = self._dump('+', b, blo, bhi)
2025-07-01 05:44:33.354
2025-07-01 05:44:33.363 > yield from g
2025-07-01 05:44:33.370
2025-07-01 05:44:33.376 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:44:33.382 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:44:33.390
2025-07-01 05:44:33.396 self = <difflib.Differ object at [hex]>
2025-07-01 05:44:33.402 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:44:33.406 alo = 32, ahi = 1101
2025-07-01 05:44:33.413 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:44:33.420 blo = 32, bhi = 1101
2025-07-01 05:44:33.427
2025-07-01 05:44:33.433 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:44:33.439 r"""
2025-07-01 05:44:33.446 When replacing one block of lines with another, search the blocks
2025-07-01 05:44:33.453 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:44:33.460 synch point, and intraline difference marking is done on the
2025-07-01 05:44:33.467 similar pair. Lots of work, but often worth it.
2025-07-01 05:44:33.472
2025-07-01 05:44:33.478 Example:
2025-07-01 05:44:33.483
2025-07-01 05:44:33.489 >>> d = Differ()
2025-07-01 05:44:33.495 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:44:33.507 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:44:33.516 >>> print(''.join(results), end="")
2025-07-01 05:44:33.523 - abcDefghiJkl
2025-07-01 05:44:33.544 + abcdefGhijkl
2025-07-01 05:44:33.565 """
2025-07-01 05:44:33.572
2025-07-01 05:44:33.580 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:44:33.587 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:44:33.594 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:44:33.605 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:44:33.615 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:44:33.628
2025-07-01 05:44:33.639 # search for the pair that matches best without being identical
2025-07-01 05:44:33.650 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:44:33.659 # on junk -- unless we have to)
2025-07-01 05:44:33.667 for j in range(blo, bhi):
2025-07-01 05:44:33.677 bj = b[j]
2025-07-01 05:44:33.690 cruncher.set_seq2(bj)
2025-07-01 05:44:33.701 for i in range(alo, ahi):
2025-07-01 05:44:33.709 ai = a[i]
2025-07-01 05:44:33.715 if ai == bj:
2025-07-01 05:44:33.724 if eqi is None:
2025-07-01 05:44:33.735 eqi, eqj = i, j
2025-07-01 05:44:33.744 continue
2025-07-01 05:44:33.756 cruncher.set_seq1(ai)
2025-07-01 05:44:33.766 # computing similarity is expensive, so use the quick
2025-07-01 05:44:33.778 # upper bounds first -- have seen this speed up messy
2025-07-01 05:44:33.787 # compares by a factor of 3.
2025-07-01 05:44:33.795 # note that ratio() is only expensive to compute the first
2025-07-01 05:44:33.808 # time it's called on a sequence pair; the expensive part
2025-07-01 05:44:33.818 # of the computation is cached by cruncher
2025-07-01 05:44:33.826 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:44:33.834 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:44:33.843 cruncher.ratio() > best_ratio:
2025-07-01 05:44:33.854 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:44:33.866 if best_ratio < cutoff:
2025-07-01 05:44:33.878 # no non-identical "pretty close" pair
2025-07-01 05:44:33.889 if eqi is None:
2025-07-01 05:44:33.901 # no identical pair either -- treat it as a straight replace
2025-07-01 05:44:33.912 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:44:33.920 return
2025-07-01 05:44:33.928 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:44:33.935 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:44:33.942 else:
2025-07-01 05:44:33.951 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:44:33.959 eqi = None
2025-07-01 05:44:33.966
2025-07-01 05:44:33.972 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:44:33.977 # identical
2025-07-01 05:44:33.982
2025-07-01 05:44:33.988 # pump out diffs from before the synch point
2025-07-01 05:44:33.999 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:44:34.011
2025-07-01 05:44:34.020 # do intraline marking on the synch pair
2025-07-01 05:44:34.026 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:44:34.031 if eqi is None:
2025-07-01 05:44:34.036 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:44:34.046 atags = btags = ""
2025-07-01 05:44:34.052 cruncher.set_seqs(aelt, belt)
2025-07-01 05:44:34.059 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:44:34.067 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:44:34.077 if tag == 'replace':
2025-07-01 05:44:34.086 atags += '^' * la
2025-07-01 05:44:34.093 btags += '^' * lb
2025-07-01 05:44:34.098 elif tag == 'delete':
2025-07-01 05:44:34.106 atags += '-' * la
2025-07-01 05:44:34.116 elif tag == 'insert':
2025-07-01 05:44:34.125 btags += '+' * lb
2025-07-01 05:44:34.131 elif tag == 'equal':
2025-07-01 05:44:34.139 atags += ' ' * la
2025-07-01 05:44:34.150 btags += ' ' * lb
2025-07-01 05:44:34.158 else:
2025-07-01 05:44:34.166 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:44:34.178 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:44:34.188 else:
2025-07-01 05:44:34.196 # the synch pair is identical
2025-07-01 05:44:34.202 yield ' ' + aelt
2025-07-01 05:44:34.208
2025-07-01 05:44:34.214 # pump out diffs from after the synch point
2025-07-01 05:44:34.220 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:44:34.226
2025-07-01 05:44:34.233 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:44:34.245 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:44:34.255
2025-07-01 05:44:34.263 self = <difflib.Differ object at [hex]>
2025-07-01 05:44:34.274 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:44:34.285 alo = 33, ahi = 1101
2025-07-01 05:44:34.294 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:44:34.302 blo = 33, bhi = 1101
2025-07-01 05:44:34.311
2025-07-01 05:44:34.322 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:44:34.334 g = []
2025-07-01 05:44:34.345 if alo < ahi:
2025-07-01 05:44:34.355 if blo < bhi:
2025-07-01 05:44:34.364 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:44:34.371 else:
2025-07-01 05:44:34.379 g = self._dump('-', a, alo, ahi)
2025-07-01 05:44:34.390 elif blo < bhi:
2025-07-01 05:44:34.401 g = self._dump('+', b, blo, bhi)
2025-07-01 05:44:34.411
2025-07-01 05:44:34.420 > yield from g
2025-07-01 05:44:34.427
2025-07-01 05:44:34.435 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:44:34.448 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:44:34.458
2025-07-01 05:44:34.465 self = <difflib.Differ object at [hex]>
2025-07-01 05:44:34.478 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:44:34.486 alo = 33, ahi = 1101
2025-07-01 05:44:34.494 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:44:34.506 blo = 33, bhi = 1101
2025-07-01 05:44:34.515
2025-07-01 05:44:34.523 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:44:34.529 r"""
2025-07-01 05:44:34.536 When replacing one block of lines with another, search the blocks
2025-07-01 05:44:34.545 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:44:34.554 synch point, and intraline difference marking is done on the
2025-07-01 05:44:34.560 similar pair. Lots of work, but often worth it.
2025-07-01 05:44:34.566
2025-07-01 05:44:34.574 Example:
2025-07-01 05:44:34.584
2025-07-01 05:44:34.594 >>> d = Differ()
2025-07-01 05:44:34.601 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:44:34.607 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:44:34.614 >>> print(''.join(results), end="")
2025-07-01 05:44:34.620 - abcDefghiJkl
2025-07-01 05:44:34.634 + abcdefGhijkl
2025-07-01 05:44:34.653 """
2025-07-01 05:44:34.661
2025-07-01 05:44:34.667 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:44:34.673 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:44:34.680 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:44:34.687 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:44:34.693 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:44:34.699
2025-07-01 05:44:34.705 # search for the pair that matches best without being identical
2025-07-01 05:44:34.711 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:44:34.718 # on junk -- unless we have to)
2025-07-01 05:44:34.724 for j in range(blo, bhi):
2025-07-01 05:44:34.733 bj = b[j]
2025-07-01 05:44:34.742 cruncher.set_seq2(bj)
2025-07-01 05:44:34.749 for i in range(alo, ahi):
2025-07-01 05:44:34.756 ai = a[i]
2025-07-01 05:44:34.762 if ai == bj:
2025-07-01 05:44:34.773 if eqi is None:
2025-07-01 05:44:34.780 eqi, eqj = i, j
2025-07-01 05:44:34.790 continue
2025-07-01 05:44:34.801 cruncher.set_seq1(ai)
2025-07-01 05:44:34.809 # computing similarity is expensive, so use the quick
2025-07-01 05:44:34.816 # upper bounds first -- have seen this speed up messy
2025-07-01 05:44:34.824 # compares by a factor of 3.
2025-07-01 05:44:34.831 # note that ratio() is only expensive to compute the first
2025-07-01 05:44:34.839 # time it's called on a sequence pair; the expensive part
2025-07-01 05:44:34.847 # of the computation is cached by cruncher
2025-07-01 05:44:34.854 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:44:34.862 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:44:34.870 cruncher.ratio() > best_ratio:
2025-07-01 05:44:34.882 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:44:34.889 if best_ratio < cutoff:
2025-07-01 05:44:34.894 # no non-identical "pretty close" pair
2025-07-01 05:44:34.899 if eqi is None:
2025-07-01 05:44:34.904 # no identical pair either -- treat it as a straight replace
2025-07-01 05:44:34.912 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:44:34.918 return
2025-07-01 05:44:34.924 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:44:34.931 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:44:34.939 else:
2025-07-01 05:44:34.947 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:44:34.954 eqi = None
2025-07-01 05:44:34.965
2025-07-01 05:44:34.974 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:44:34.981 # identical
2025-07-01 05:44:34.987
2025-07-01 05:44:34.992 # pump out diffs from before the synch point
2025-07-01 05:44:34.998 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:44:35.004
2025-07-01 05:44:35.010 # do intraline marking on the synch pair
2025-07-01 05:44:35.018 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:44:35.026 if eqi is None:
2025-07-01 05:44:35.034 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:44:35.041 atags = btags = ""
2025-07-01 05:44:35.048 cruncher.set_seqs(aelt, belt)
2025-07-01 05:44:35.055 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:44:35.062 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:44:35.068 if tag == 'replace':
2025-07-01 05:44:35.075 atags += '^' * la
2025-07-01 05:44:35.081 btags += '^' * lb
2025-07-01 05:44:35.091 elif tag == 'delete':
2025-07-01 05:44:35.099 atags += '-' * la
2025-07-01 05:44:35.105 elif tag == 'insert':
2025-07-01 05:44:35.111 btags += '+' * lb
2025-07-01 05:44:35.119 elif tag == 'equal':
2025-07-01 05:44:35.128 atags += ' ' * la
2025-07-01 05:44:35.135 btags += ' ' * lb
2025-07-01 05:44:35.143 else:
2025-07-01 05:44:35.155 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:44:35.164 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:44:35.171 else:
2025-07-01 05:44:35.177 # the synch pair is identical
2025-07-01 05:44:35.182 yield ' ' + aelt
2025-07-01 05:44:35.189
2025-07-01 05:44:35.197 # pump out diffs from after the synch point
2025-07-01 05:44:35.204 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:44:35.210
2025-07-01 05:44:35.217 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:44:35.225 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:44:35.232
2025-07-01 05:44:35.240 self = <difflib.Differ object at [hex]>
2025-07-01 05:44:35.249 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:44:35.258 alo = 34, ahi = 1101
2025-07-01 05:44:35.268 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:44:35.275 blo = 34, bhi = 1101
2025-07-01 05:44:35.280
2025-07-01 05:44:35.286 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:44:35.294 g = []
2025-07-01 05:44:35.303 if alo < ahi:
2025-07-01 05:44:35.316 if blo < bhi:
2025-07-01 05:44:35.325 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:44:35.333 else:
2025-07-01 05:44:35.340 g = self._dump('-', a, alo, ahi)
2025-07-01 05:44:35.347 elif blo < bhi:
2025-07-01 05:44:35.354 g = self._dump('+', b, blo, bhi)
2025-07-01 05:44:35.368
2025-07-01 05:44:35.380 > yield from g
2025-07-01 05:44:35.389
2025-07-01 05:44:35.395 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:44:35.400 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:44:35.404
2025-07-01 05:44:35.409 self = <difflib.Differ object at [hex]>
2025-07-01 05:44:35.416 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:44:35.421 alo = 34, ahi = 1101
2025-07-01 05:44:35.432 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:44:35.444 blo = 34, bhi = 1101
2025-07-01 05:44:35.454
2025-07-01 05:44:35.464 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:44:35.471 r"""
2025-07-01 05:44:35.477 When replacing one block of lines with another, search the blocks
2025-07-01 05:44:35.488 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:44:35.500 synch point, and intraline difference marking is done on the
2025-07-01 05:44:35.513 similar pair. Lots of work, but often worth it.
2025-07-01 05:44:35.522
2025-07-01 05:44:35.531 Example:
2025-07-01 05:44:35.543
2025-07-01 05:44:35.552 >>> d = Differ()
2025-07-01 05:44:35.560 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:44:35.567 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:44:35.575 >>> print(''.join(results), end="")
2025-07-01 05:44:35.581 - abcDefghiJkl
2025-07-01 05:44:35.592 + abcdefGhijkl
2025-07-01 05:44:35.605 """
2025-07-01 05:44:35.612
2025-07-01 05:44:35.619 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:44:35.628 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:44:35.635 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:44:35.641 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:44:35.646 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:44:35.651
2025-07-01 05:44:35.657 # search for the pair that matches best without being identical
2025-07-01 05:44:35.662 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:44:35.668 # on junk -- unless we have to)
2025-07-01 05:44:35.674 for j in range(blo, bhi):
2025-07-01 05:44:35.679 bj = b[j]
2025-07-01 05:44:35.686 cruncher.set_seq2(bj)
2025-07-01 05:44:35.692 for i in range(alo, ahi):
2025-07-01 05:44:35.699 ai = a[i]
2025-07-01 05:44:35.705 if ai == bj:
2025-07-01 05:44:35.712 if eqi is None:
2025-07-01 05:44:35.719 eqi, eqj = i, j
2025-07-01 05:44:35.725 continue
2025-07-01 05:44:35.732 cruncher.set_seq1(ai)
2025-07-01 05:44:35.739 # computing similarity is expensive, so use the quick
2025-07-01 05:44:35.747 # upper bounds first -- have seen this speed up messy
2025-07-01 05:44:35.760 # compares by a factor of 3.
2025-07-01 05:44:35.769 # note that ratio() is only expensive to compute the first
2025-07-01 05:44:35.776 # time it's called on a sequence pair; the expensive part
2025-07-01 05:44:35.783 # of the computation is cached by cruncher
2025-07-01 05:44:35.794 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:44:35.807 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:44:35.816 cruncher.ratio() > best_ratio:
2025-07-01 05:44:35.824 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:44:35.830 if best_ratio < cutoff:
2025-07-01 05:44:35.837 # no non-identical "pretty close" pair
2025-07-01 05:44:35.842 if eqi is None:
2025-07-01 05:44:35.848 # no identical pair either -- treat it as a straight replace
2025-07-01 05:44:35.855 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:44:35.868 return
2025-07-01 05:44:35.874 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:44:35.883 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:44:35.890 else:
2025-07-01 05:44:35.897 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:44:35.903 eqi = None
2025-07-01 05:44:35.908
2025-07-01 05:44:35.914 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:44:35.920 # identical
2025-07-01 05:44:35.929
2025-07-01 05:44:35.939 # pump out diffs from before the synch point
2025-07-01 05:44:35.949 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:44:35.961
2025-07-01 05:44:35.971 # do intraline marking on the synch pair
2025-07-01 05:44:35.979 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:44:35.986 if eqi is None:
2025-07-01 05:44:35.996 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:44:36.006 atags = btags = ""
2025-07-01 05:44:36.014 cruncher.set_seqs(aelt, belt)
2025-07-01 05:44:36.022 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:44:36.033 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:44:36.043 if tag == 'replace':
2025-07-01 05:44:36.051 atags += '^' * la
2025-07-01 05:44:36.059 btags += '^' * lb
2025-07-01 05:44:36.070 elif tag == 'delete':
2025-07-01 05:44:36.079 atags += '-' * la
2025-07-01 05:44:36.086 elif tag == 'insert':
2025-07-01 05:44:36.097 btags += '+' * lb
2025-07-01 05:44:36.106 elif tag == 'equal':
2025-07-01 05:44:36.119 atags += ' ' * la
2025-07-01 05:44:36.128 btags += ' ' * lb
2025-07-01 05:44:36.137 else:
2025-07-01 05:44:36.144 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:44:36.150 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:44:36.156 else:
2025-07-01 05:44:36.168 # the synch pair is identical
2025-07-01 05:44:36.179 yield ' ' + aelt
2025-07-01 05:44:36.186
2025-07-01 05:44:36.194 # pump out diffs from after the synch point
2025-07-01 05:44:36.203 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:44:36.213
2025-07-01 05:44:36.222 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:44:36.231 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:44:36.240
2025-07-01 05:44:36.247 self = <difflib.Differ object at [hex]>
2025-07-01 05:44:36.253 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:44:36.258 alo = 35, ahi = 1101
2025-07-01 05:44:36.266 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:44:36.276 blo = 35, bhi = 1101
2025-07-01 05:44:36.285
2025-07-01 05:44:36.298 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:44:36.310 g = []
2025-07-01 05:44:36.322 if alo < ahi:
2025-07-01 05:44:36.330 if blo < bhi:
2025-07-01 05:44:36.336 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:44:36.342 else:
2025-07-01 05:44:36.347 g = self._dump('-', a, alo, ahi)
2025-07-01 05:44:36.352 elif blo < bhi:
2025-07-01 05:44:36.357 g = self._dump('+', b, blo, bhi)
2025-07-01 05:44:36.365
2025-07-01 05:44:36.373 > yield from g
2025-07-01 05:44:36.380
2025-07-01 05:44:36.389 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:44:36.396 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:44:36.402
2025-07-01 05:44:36.414 self = <difflib.Differ object at [hex]>
2025-07-01 05:44:36.426 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:44:36.436 alo = 35, ahi = 1101
2025-07-01 05:44:36.444 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:44:36.449 blo = 35, bhi = 1101
2025-07-01 05:44:36.453
2025-07-01 05:44:36.459 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:44:36.470 r"""
2025-07-01 05:44:36.480 When replacing one block of lines with another, search the blocks
2025-07-01 05:44:36.489 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:44:36.497 synch point, and intraline difference marking is done on the
2025-07-01 05:44:36.504 similar pair. Lots of work, but often worth it.
2025-07-01 05:44:36.510
2025-07-01 05:44:36.516 Example:
2025-07-01 05:44:36.522
2025-07-01 05:44:36.533 >>> d = Differ()
2025-07-01 05:44:36.541 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:44:36.548 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:44:36.554 >>> print(''.join(results), end="")
2025-07-01 05:44:36.559 - abcDefghiJkl
2025-07-01 05:44:36.568 + abcdefGhijkl
2025-07-01 05:44:36.578 """
2025-07-01 05:44:36.587
2025-07-01 05:44:36.595 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:44:36.602 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:44:36.611 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:44:36.621 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:44:36.628 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:44:36.633
2025-07-01 05:44:36.639 # search for the pair that matches best without being identical
2025-07-01 05:44:36.644 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:44:36.649 # on junk -- unless we have to)
2025-07-01 05:44:36.660 for j in range(blo, bhi):
2025-07-01 05:44:36.667 bj = b[j]
2025-07-01 05:44:36.673 cruncher.set_seq2(bj)
2025-07-01 05:44:36.679 for i in range(alo, ahi):
2025-07-01 05:44:36.686 ai = a[i]
2025-07-01 05:44:36.698 if ai == bj:
2025-07-01 05:44:36.705 if eqi is None:
2025-07-01 05:44:36.712 eqi, eqj = i, j
2025-07-01 05:44:36.718 continue
2025-07-01 05:44:36.729 cruncher.set_seq1(ai)
2025-07-01 05:44:36.739 # computing similarity is expensive, so use the quick
2025-07-01 05:44:36.745 # upper bounds first -- have seen this speed up messy
2025-07-01 05:44:36.751 # compares by a factor of 3.
2025-07-01 05:44:36.756 # note that ratio() is only expensive to compute the first
2025-07-01 05:44:36.762 # time it's called on a sequence pair; the expensive part
2025-07-01 05:44:36.772 # of the computation is cached by cruncher
2025-07-01 05:44:36.783 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:44:36.797 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:44:36.806 cruncher.ratio() > best_ratio:
2025-07-01 05:44:36.815 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:44:36.826 if best_ratio < cutoff:
2025-07-01 05:44:36.834 # no non-identical "pretty close" pair
2025-07-01 05:44:36.841 if eqi is None:
2025-07-01 05:44:36.846 # no identical pair either -- treat it as a straight replace
2025-07-01 05:44:36.851 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:44:36.856 return
2025-07-01 05:44:36.862 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:44:36.868 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:44:36.873 else:
2025-07-01 05:44:36.879 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:44:36.886 eqi = None
2025-07-01 05:44:36.895
2025-07-01 05:44:36.904 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:44:36.911 # identical
2025-07-01 05:44:36.921
2025-07-01 05:44:36.927 # pump out diffs from before the synch point
2025-07-01 05:44:36.933 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:44:36.939
2025-07-01 05:44:36.946 # do intraline marking on the synch pair
2025-07-01 05:44:36.953 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:44:36.965 if eqi is None:
2025-07-01 05:44:36.976 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:44:36.983 atags = btags = ""
2025-07-01 05:44:36.990 cruncher.set_seqs(aelt, belt)
2025-07-01 05:44:36.995 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:44:37.001 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:44:37.008 if tag == 'replace':
2025-07-01 05:44:37.015 atags += '^' * la
2025-07-01 05:44:37.021 btags += '^' * lb
2025-07-01 05:44:37.027 elif tag == 'delete':
2025-07-01 05:44:37.032 atags += '-' * la
2025-07-01 05:44:37.038 elif tag == 'insert':
2025-07-01 05:44:37.049 btags += '+' * lb
2025-07-01 05:44:37.057 elif tag == 'equal':
2025-07-01 05:44:37.064 atags += ' ' * la
2025-07-01 05:44:37.070 btags += ' ' * lb
2025-07-01 05:44:37.079 else:
2025-07-01 05:44:37.088 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:44:37.095 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:44:37.101 else:
2025-07-01 05:44:37.106 # the synch pair is identical
2025-07-01 05:44:37.114 yield ' ' + aelt
2025-07-01 05:44:37.124
2025-07-01 05:44:37.132 # pump out diffs from after the synch point
2025-07-01 05:44:37.139 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:44:37.147
2025-07-01 05:44:37.157 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:44:37.166 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:44:37.174
2025-07-01 05:44:37.185 self = <difflib.Differ object at [hex]>
2025-07-01 05:44:37.195 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:44:37.207 alo = 36, ahi = 1101
2025-07-01 05:44:37.219 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:44:37.228 blo = 36, bhi = 1101
2025-07-01 05:44:37.241
2025-07-01 05:44:37.251 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:44:37.259 g = []
2025-07-01 05:44:37.266 if alo < ahi:
2025-07-01 05:44:37.276 if blo < bhi:
2025-07-01 05:44:37.287 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:44:37.295 else:
2025-07-01 05:44:37.303 g = self._dump('-', a, alo, ahi)
2025-07-01 05:44:37.310 elif blo < bhi:
2025-07-01 05:44:37.320 g = self._dump('+', b, blo, bhi)
2025-07-01 05:44:37.330
2025-07-01 05:44:37.341 > yield from g
2025-07-01 05:44:37.352
2025-07-01 05:44:37.363 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:44:37.375 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:44:37.387
2025-07-01 05:44:37.397 self = <difflib.Differ object at [hex]>
2025-07-01 05:44:37.409 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:44:37.421 alo = 36, ahi = 1101
2025-07-01 05:44:37.433 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:44:37.446 blo = 36, bhi = 1101
2025-07-01 05:44:37.458
2025-07-01 05:44:37.473 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:44:37.480 r"""
2025-07-01 05:44:37.492 When replacing one block of lines with another, search the blocks
2025-07-01 05:44:37.505 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:44:37.515 synch point, and intraline difference marking is done on the
2025-07-01 05:44:37.522 similar pair. Lots of work, but often worth it.
2025-07-01 05:44:37.528
2025-07-01 05:44:37.533 Example:
2025-07-01 05:44:37.541
2025-07-01 05:44:37.555 >>> d = Differ()
2025-07-01 05:44:37.567 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:44:37.576 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:44:37.583 >>> print(''.join(results), end="")
2025-07-01 05:44:37.589 - abcDefghiJkl
2025-07-01 05:44:37.600 + abcdefGhijkl
2025-07-01 05:44:37.611 """
2025-07-01 05:44:37.623
2025-07-01 05:44:37.635 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:44:37.646 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:44:37.657 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:44:37.667 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:44:37.679 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:44:37.688
2025-07-01 05:44:37.695 # search for the pair that matches best without being identical
2025-07-01 05:44:37.705 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:44:37.717 # on junk -- unless we have to)
2025-07-01 05:44:37.728 for j in range(blo, bhi):
2025-07-01 05:44:37.739 bj = b[j]
2025-07-01 05:44:37.749 cruncher.set_seq2(bj)
2025-07-01 05:44:37.756 for i in range(alo, ahi):
2025-07-01 05:44:37.763 ai = a[i]
2025-07-01 05:44:37.771 if ai == bj:
2025-07-01 05:44:37.782 if eqi is None:
2025-07-01 05:44:37.792 eqi, eqj = i, j
2025-07-01 05:44:37.800 continue
2025-07-01 05:44:37.813 cruncher.set_seq1(ai)
2025-07-01 05:44:37.820 # computing similarity is expensive, so use the quick
2025-07-01 05:44:37.827 # upper bounds first -- have seen this speed up messy
2025-07-01 05:44:37.833 # compares by a factor of 3.
2025-07-01 05:44:37.840 # note that ratio() is only expensive to compute the first
2025-07-01 05:44:37.847 # time it's called on a sequence pair; the expensive part
2025-07-01 05:44:37.855 # of the computation is cached by cruncher
2025-07-01 05:44:37.864 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:44:37.875 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:44:37.885 cruncher.ratio() > best_ratio:
2025-07-01 05:44:37.897 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:44:37.907 if best_ratio < cutoff:
2025-07-01 05:44:37.914 # no non-identical "pretty close" pair
2025-07-01 05:44:37.925 if eqi is None:
2025-07-01 05:44:37.935 # no identical pair either -- treat it as a straight replace
2025-07-01 05:44:37.943 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:44:37.949 return
2025-07-01 05:44:37.955 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:44:37.961 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:44:37.967 else:
2025-07-01 05:44:37.982 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:44:37.993 eqi = None
2025-07-01 05:44:38.001
2025-07-01 05:44:38.008 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:44:38.015 # identical
2025-07-01 05:44:38.022
2025-07-01 05:44:38.028 # pump out diffs from before the synch point
2025-07-01 05:44:38.034 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:44:38.040
2025-07-01 05:44:38.046 # do intraline marking on the synch pair
2025-07-01 05:44:38.052 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:44:38.058 if eqi is None:
2025-07-01 05:44:38.064 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:44:38.070 atags = btags = ""
2025-07-01 05:44:38.076 cruncher.set_seqs(aelt, belt)
2025-07-01 05:44:38.082 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:44:38.087 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:44:38.093 if tag == 'replace':
2025-07-01 05:44:38.097 atags += '^' * la
2025-07-01 05:44:38.102 btags += '^' * lb
2025-07-01 05:44:38.107 elif tag == 'delete':
2025-07-01 05:44:38.112 atags += '-' * la
2025-07-01 05:44:38.118 elif tag == 'insert':
2025-07-01 05:44:38.124 btags += '+' * lb
2025-07-01 05:44:38.130 elif tag == 'equal':
2025-07-01 05:44:38.141 atags += ' ' * la
2025-07-01 05:44:38.150 btags += ' ' * lb
2025-07-01 05:44:38.157 else:
2025-07-01 05:44:38.165 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:44:38.172 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:44:38.179 else:
2025-07-01 05:44:38.186 # the synch pair is identical
2025-07-01 05:44:38.194 yield ' ' + aelt
2025-07-01 05:44:38.202
2025-07-01 05:44:38.210 # pump out diffs from after the synch point
2025-07-01 05:44:38.216 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:44:38.224
2025-07-01 05:44:38.235 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:44:38.244 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:44:38.251
2025-07-01 05:44:38.261 self = <difflib.Differ object at [hex]>
2025-07-01 05:44:38.275 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:44:38.281 alo = 37, ahi = 1101
2025-07-01 05:44:38.288 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:44:38.294 blo = 37, bhi = 1101
2025-07-01 05:44:38.301
2025-07-01 05:44:38.308 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:44:38.315 g = []
2025-07-01 05:44:38.322 if alo < ahi:
2025-07-01 05:44:38.333 if blo < bhi:
2025-07-01 05:44:38.342 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:44:38.348 else:
2025-07-01 05:44:38.355 g = self._dump('-', a, alo, ahi)
2025-07-01 05:44:38.362 elif blo < bhi:
2025-07-01 05:44:38.368 g = self._dump('+', b, blo, bhi)
2025-07-01 05:44:38.375
2025-07-01 05:44:38.381 > yield from g
2025-07-01 05:44:38.388
2025-07-01 05:44:38.395 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:44:38.402 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:44:38.414
2025-07-01 05:44:38.420 self = <difflib.Differ object at [hex]>
2025-07-01 05:44:38.428 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:44:38.435 alo = 37, ahi = 1101
2025-07-01 05:44:38.444 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:44:38.451 blo = 37, bhi = 1101
2025-07-01 05:44:38.460
2025-07-01 05:44:38.471 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:44:38.479 r"""
2025-07-01 05:44:38.486 When replacing one block of lines with another, search the blocks
2025-07-01 05:44:38.492 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:44:38.498 synch point, and intraline difference marking is done on the
2025-07-01 05:44:38.505 similar pair. Lots of work, but often worth it.
2025-07-01 05:44:38.511
2025-07-01 05:44:38.517 Example:
2025-07-01 05:44:38.523
2025-07-01 05:44:38.528 >>> d = Differ()
2025-07-01 05:44:38.534 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:44:38.540 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:44:38.546 >>> print(''.join(results), end="")
2025-07-01 05:44:38.553 - abcDefghiJkl
2025-07-01 05:44:38.566 + abcdefGhijkl
2025-07-01 05:44:38.580 """
2025-07-01 05:44:38.586
2025-07-01 05:44:38.593 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:44:38.601 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:44:38.608 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:44:38.616 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:44:38.624 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:44:38.631
2025-07-01 05:44:38.640 # search for the pair that matches best without being identical
2025-07-01 05:44:38.648 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:44:38.656 # on junk -- unless we have to)
2025-07-01 05:44:38.664 for j in range(blo, bhi):
2025-07-01 05:44:38.671 bj = b[j]
2025-07-01 05:44:38.679 cruncher.set_seq2(bj)
2025-07-01 05:44:38.687 for i in range(alo, ahi):
2025-07-01 05:44:38.694 ai = a[i]
2025-07-01 05:44:38.703 if ai == bj:
2025-07-01 05:44:38.718 if eqi is None:
2025-07-01 05:44:38.728 eqi, eqj = i, j
2025-07-01 05:44:38.741 continue
2025-07-01 05:44:38.751 cruncher.set_seq1(ai)
2025-07-01 05:44:38.759 # computing similarity is expensive, so use the quick
2025-07-01 05:44:38.766 # upper bounds first -- have seen this speed up messy
2025-07-01 05:44:38.773 # compares by a factor of 3.
2025-07-01 05:44:38.780 # note that ratio() is only expensive to compute the first
2025-07-01 05:44:38.784 # time it's called on a sequence pair; the expensive part
2025-07-01 05:44:38.789 # of the computation is cached by cruncher
2025-07-01 05:44:38.794 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:44:38.798 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:44:38.803 cruncher.ratio() > best_ratio:
2025-07-01 05:44:38.808 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:44:38.813 if best_ratio < cutoff:
2025-07-01 05:44:38.819 # no non-identical "pretty close" pair
2025-07-01 05:44:38.825 if eqi is None:
2025-07-01 05:44:38.831 # no identical pair either -- treat it as a straight replace
2025-07-01 05:44:38.838 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:44:38.842 return
2025-07-01 05:44:38.847 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:44:38.852 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:44:38.856 else:
2025-07-01 05:44:38.862 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:44:38.870 eqi = None
2025-07-01 05:44:38.878
2025-07-01 05:44:38.887 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:44:38.896 # identical
2025-07-01 05:44:38.907
2025-07-01 05:44:38.919 # pump out diffs from before the synch point
2025-07-01 05:44:38.929 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:44:38.938
2025-07-01 05:44:38.945 # do intraline marking on the synch pair
2025-07-01 05:44:38.951 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:44:38.959 if eqi is None:
2025-07-01 05:44:38.969 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:44:38.978 atags = btags = ""
2025-07-01 05:44:38.987 cruncher.set_seqs(aelt, belt)
2025-07-01 05:44:38.995 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:44:39.002 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:44:39.008 if tag == 'replace':
2025-07-01 05:44:39.014 atags += '^' * la
2025-07-01 05:44:39.020 btags += '^' * lb
2025-07-01 05:44:39.026 elif tag == 'delete':
2025-07-01 05:44:39.036 atags += '-' * la
2025-07-01 05:44:39.045 elif tag == 'insert':
2025-07-01 05:44:39.054 btags += '+' * lb
2025-07-01 05:44:39.064 elif tag == 'equal':
2025-07-01 05:44:39.076 atags += ' ' * la
2025-07-01 05:44:39.086 btags += ' ' * lb
2025-07-01 05:44:39.095 else:
2025-07-01 05:44:39.107 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:44:39.116 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:44:39.127 else:
2025-07-01 05:44:39.138 # the synch pair is identical
2025-07-01 05:44:39.148 yield ' ' + aelt
2025-07-01 05:44:39.159
2025-07-01 05:44:39.172 # pump out diffs from after the synch point
2025-07-01 05:44:39.182 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:44:39.191
2025-07-01 05:44:39.199 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:44:39.205 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:44:39.210
2025-07-01 05:44:39.216 self = <difflib.Differ object at [hex]>
2025-07-01 05:44:39.223 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:44:39.229 alo = 38, ahi = 1101
2025-07-01 05:44:39.236 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:44:39.242 blo = 38, bhi = 1101
2025-07-01 05:44:39.254
2025-07-01 05:44:39.265 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:44:39.274 g = []
2025-07-01 05:44:39.282 if alo < ahi:
2025-07-01 05:44:39.291 if blo < bhi:
2025-07-01 05:44:39.298 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:44:39.304 else:
2025-07-01 05:44:39.310 g = self._dump('-', a, alo, ahi)
2025-07-01 05:44:39.315 elif blo < bhi:
2025-07-01 05:44:39.323 g = self._dump('+', b, blo, bhi)
2025-07-01 05:44:39.333
2025-07-01 05:44:39.341 > yield from g
2025-07-01 05:44:39.348
2025-07-01 05:44:39.354 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:44:39.360 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:44:39.368
2025-07-01 05:44:39.378 self = <difflib.Differ object at [hex]>
2025-07-01 05:44:39.386 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:44:39.393 alo = 38, ahi = 1101
2025-07-01 05:44:39.399 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:44:39.406 blo = 38, bhi = 1101
2025-07-01 05:44:39.413
2025-07-01 05:44:39.419 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:44:39.425 r"""
2025-07-01 05:44:39.437 When replacing one block of lines with another, search the blocks
2025-07-01 05:44:39.444 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:44:39.450 synch point, and intraline difference marking is done on the
2025-07-01 05:44:39.456 similar pair. Lots of work, but often worth it.
2025-07-01 05:44:39.461
2025-07-01 05:44:39.467 Example:
2025-07-01 05:44:39.473
2025-07-01 05:44:39.482 >>> d = Differ()
2025-07-01 05:44:39.489 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:44:39.495 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:44:39.501 >>> print(''.join(results), end="")
2025-07-01 05:44:39.507 - abcDefghiJkl
2025-07-01 05:44:39.518 + abcdefGhijkl
2025-07-01 05:44:39.530 """
2025-07-01 05:44:39.537
2025-07-01 05:44:39.548 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:44:39.558 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:44:39.564 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:44:39.571 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:44:39.577 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:44:39.584
2025-07-01 05:44:39.591 # search for the pair that matches best without being identical
2025-07-01 05:44:39.598 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:44:39.605 # on junk -- unless we have to)
2025-07-01 05:44:39.612 for j in range(blo, bhi):
2025-07-01 05:44:39.619 bj = b[j]
2025-07-01 05:44:39.626 cruncher.set_seq2(bj)
2025-07-01 05:44:39.637 for i in range(alo, ahi):
2025-07-01 05:44:39.646 ai = a[i]
2025-07-01 05:44:39.654 if ai == bj:
2025-07-01 05:44:39.660 if eqi is None:
2025-07-01 05:44:39.667 eqi, eqj = i, j
2025-07-01 05:44:39.674 continue
2025-07-01 05:44:39.683 cruncher.set_seq1(ai)
2025-07-01 05:44:39.694 # computing similarity is expensive, so use the quick
2025-07-01 05:44:39.702 # upper bounds first -- have seen this speed up messy
2025-07-01 05:44:39.708 # compares by a factor of 3.
2025-07-01 05:44:39.714 # note that ratio() is only expensive to compute the first
2025-07-01 05:44:39.724 # time it's called on a sequence pair; the expensive part
2025-07-01 05:44:39.735 # of the computation is cached by cruncher
2025-07-01 05:44:39.745 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:44:39.751 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:44:39.757 cruncher.ratio() > best_ratio:
2025-07-01 05:44:39.764 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:44:39.770 if best_ratio < cutoff:
2025-07-01 05:44:39.774 # no non-identical "pretty close" pair
2025-07-01 05:44:39.782 if eqi is None:
2025-07-01 05:44:39.792 # no identical pair either -- treat it as a straight replace
2025-07-01 05:44:39.799 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:44:39.805 return
2025-07-01 05:44:39.812 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:44:39.818 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:44:39.825 else:
2025-07-01 05:44:39.832 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:44:39.839 eqi = None
2025-07-01 05:44:39.847
2025-07-01 05:44:39.860 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:44:39.870 # identical
2025-07-01 05:44:39.878
2025-07-01 05:44:39.884 # pump out diffs from before the synch point
2025-07-01 05:44:39.891 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:44:39.898
2025-07-01 05:44:39.908 # do intraline marking on the synch pair
2025-07-01 05:44:39.917 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:44:39.924 if eqi is None:
2025-07-01 05:44:39.930 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:44:39.934 atags = btags = ""
2025-07-01 05:44:39.940 cruncher.set_seqs(aelt, belt)
2025-07-01 05:44:39.946 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:44:39.952 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:44:39.958 if tag == 'replace':
2025-07-01 05:44:39.966 atags += '^' * la
2025-07-01 05:44:39.973 btags += '^' * lb
2025-07-01 05:44:39.979 elif tag == 'delete':
2025-07-01 05:44:39.984 atags += '-' * la
2025-07-01 05:44:39.990 elif tag == 'insert':
2025-07-01 05:44:39.994 btags += '+' * lb
2025-07-01 05:44:39.999 elif tag == 'equal':
2025-07-01 05:44:40.006 atags += ' ' * la
2025-07-01 05:44:40.013 btags += ' ' * lb
2025-07-01 05:44:40.019 else:
2025-07-01 05:44:40.029 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:44:40.041 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:44:40.049 else:
2025-07-01 05:44:40.055 # the synch pair is identical
2025-07-01 05:44:40.061 yield ' ' + aelt
2025-07-01 05:44:40.065
2025-07-01 05:44:40.069 # pump out diffs from after the synch point
2025-07-01 05:44:40.074 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:44:40.078
2025-07-01 05:44:40.083 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:44:40.087 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:44:40.092
2025-07-01 05:44:40.097 self = <difflib.Differ object at [hex]>
2025-07-01 05:44:40.106 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:44:40.111 alo = 39, ahi = 1101
2025-07-01 05:44:40.118 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:44:40.125 blo = 39, bhi = 1101
2025-07-01 05:44:40.132
2025-07-01 05:44:40.138 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:44:40.144 g = []
2025-07-01 05:44:40.155 if alo < ahi:
2025-07-01 05:44:40.165 if blo < bhi:
2025-07-01 05:44:40.175 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:44:40.185 else:
2025-07-01 05:44:40.193 g = self._dump('-', a, alo, ahi)
2025-07-01 05:44:40.199 elif blo < bhi:
2025-07-01 05:44:40.204 g = self._dump('+', b, blo, bhi)
2025-07-01 05:44:40.209
2025-07-01 05:44:40.214 > yield from g
2025-07-01 05:44:40.218
2025-07-01 05:44:40.223 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:44:40.229 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:44:40.234
2025-07-01 05:44:40.243 self = <difflib.Differ object at [hex]>
2025-07-01 05:44:40.256 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:44:40.266 alo = 39, ahi = 1101
2025-07-01 05:44:40.276 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:44:40.282 blo = 39, bhi = 1101
2025-07-01 05:44:40.293
2025-07-01 05:44:40.301 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:44:40.307 r"""
2025-07-01 05:44:40.314 When replacing one block of lines with another, search the blocks
2025-07-01 05:44:40.324 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:44:40.333 synch point, and intraline difference marking is done on the
2025-07-01 05:44:40.340 similar pair. Lots of work, but often worth it.
2025-07-01 05:44:40.346
2025-07-01 05:44:40.352 Example:
2025-07-01 05:44:40.357
2025-07-01 05:44:40.363 >>> d = Differ()
2025-07-01 05:44:40.369 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:44:40.375 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:44:40.382 >>> print(''.join(results), end="")
2025-07-01 05:44:40.392 - abcDefghiJkl
2025-07-01 05:44:40.408 + abcdefGhijkl
2025-07-01 05:44:40.419 """
2025-07-01 05:44:40.425
2025-07-01 05:44:40.431 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:44:40.437 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:44:40.443 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:44:40.450 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:44:40.459 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:44:40.468
2025-07-01 05:44:40.474 # search for the pair that matches best without being identical
2025-07-01 05:44:40.481 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:44:40.487 # on junk -- unless we have to)
2025-07-01 05:44:40.495 for j in range(blo, bhi):
2025-07-01 05:44:40.506 bj = b[j]
2025-07-01 05:44:40.517 cruncher.set_seq2(bj)
2025-07-01 05:44:40.526 for i in range(alo, ahi):
2025-07-01 05:44:40.532 ai = a[i]
2025-07-01 05:44:40.538 if ai == bj:
2025-07-01 05:44:40.544 if eqi is None:
2025-07-01 05:44:40.551 eqi, eqj = i, j
2025-07-01 05:44:40.557 continue
2025-07-01 05:44:40.566 cruncher.set_seq1(ai)
2025-07-01 05:44:40.579 # computing similarity is expensive, so use the quick
2025-07-01 05:44:40.590 # upper bounds first -- have seen this speed up messy
2025-07-01 05:44:40.599 # compares by a factor of 3.
2025-07-01 05:44:40.607 # note that ratio() is only expensive to compute the first
2025-07-01 05:44:40.614 # time it's called on a sequence pair; the expensive part
2025-07-01 05:44:40.620 # of the computation is cached by cruncher
2025-07-01 05:44:40.627 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:44:40.632 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:44:40.638 cruncher.ratio() > best_ratio:
2025-07-01 05:44:40.650 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:44:40.660 if best_ratio < cutoff:
2025-07-01 05:44:40.668 # no non-identical "pretty close" pair
2025-07-01 05:44:40.675 if eqi is None:
2025-07-01 05:44:40.681 # no identical pair either -- treat it as a straight replace
2025-07-01 05:44:40.692 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:44:40.702 return
2025-07-01 05:44:40.709 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:44:40.716 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:44:40.723 else:
2025-07-01 05:44:40.733 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:44:40.742 eqi = None
2025-07-01 05:44:40.750
2025-07-01 05:44:40.761 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:44:40.771 # identical
2025-07-01 05:44:40.777
2025-07-01 05:44:40.783 # pump out diffs from before the synch point
2025-07-01 05:44:40.794 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:44:40.802
2025-07-01 05:44:40.810 # do intraline marking on the synch pair
2025-07-01 05:44:40.815 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:44:40.820 if eqi is None:
2025-07-01 05:44:40.825 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:44:40.829 atags = btags = ""
2025-07-01 05:44:40.833 cruncher.set_seqs(aelt, belt)
2025-07-01 05:44:40.838 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:44:40.842 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:44:40.847 if tag == 'replace':
2025-07-01 05:44:40.851 atags += '^' * la
2025-07-01 05:44:40.855 btags += '^' * lb
2025-07-01 05:44:40.860 elif tag == 'delete':
2025-07-01 05:44:40.865 atags += '-' * la
2025-07-01 05:44:40.872 elif tag == 'insert':
2025-07-01 05:44:40.877 btags += '+' * lb
2025-07-01 05:44:40.882 elif tag == 'equal':
2025-07-01 05:44:40.891 atags += ' ' * la
2025-07-01 05:44:40.897 btags += ' ' * lb
2025-07-01 05:44:40.902 else:
2025-07-01 05:44:40.909 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:44:40.914 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:44:40.919 else:
2025-07-01 05:44:40.926 # the synch pair is identical
2025-07-01 05:44:40.931 yield ' ' + aelt
2025-07-01 05:44:40.942
2025-07-01 05:44:40.952 # pump out diffs from after the synch point
2025-07-01 05:44:40.962 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:44:40.971
2025-07-01 05:44:40.978 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:44:40.987 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:44:40.997
2025-07-01 05:44:41.011 self = <difflib.Differ object at [hex]>
2025-07-01 05:44:41.022 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:44:41.033 alo = 40, ahi = 1101
2025-07-01 05:44:41.045 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:44:41.053 blo = 40, bhi = 1101
2025-07-01 05:44:41.061
2025-07-01 05:44:41.072 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:44:41.081 g = []
2025-07-01 05:44:41.089 if alo < ahi:
2025-07-01 05:44:41.096 if blo < bhi:
2025-07-01 05:44:41.103 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:44:41.108 else:
2025-07-01 05:44:41.114 g = self._dump('-', a, alo, ahi)
2025-07-01 05:44:41.124 elif blo < bhi:
2025-07-01 05:44:41.136 g = self._dump('+', b, blo, bhi)
2025-07-01 05:44:41.145
2025-07-01 05:44:41.150 > yield from g
2025-07-01 05:44:41.157
2025-07-01 05:44:41.164 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:44:41.169 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:44:41.174
2025-07-01 05:44:41.182 self = <difflib.Differ object at [hex]>
2025-07-01 05:44:41.190 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:44:41.197 alo = 40, ahi = 1101
2025-07-01 05:44:41.206 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:44:41.215 blo = 40, bhi = 1101
2025-07-01 05:44:41.221
2025-07-01 05:44:41.231 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:44:41.238 r"""
2025-07-01 05:44:41.245 When replacing one block of lines with another, search the blocks
2025-07-01 05:44:41.252 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:44:41.259 synch point, and intraline difference marking is done on the
2025-07-01 05:44:41.266 similar pair. Lots of work, but often worth it.
2025-07-01 05:44:41.278
2025-07-01 05:44:41.287 Example:
2025-07-01 05:44:41.294
2025-07-01 05:44:41.300 >>> d = Differ()
2025-07-01 05:44:41.307 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:44:41.316 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:44:41.323 >>> print(''.join(results), end="")
2025-07-01 05:44:41.330 - abcDefghiJkl
2025-07-01 05:44:41.345 + abcdefGhijkl
2025-07-01 05:44:41.358 """
2025-07-01 05:44:41.363
2025-07-01 05:44:41.370 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:44:41.376 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:44:41.382 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:44:41.389 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:44:41.396 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:44:41.406
2025-07-01 05:44:41.414 # search for the pair that matches best without being identical
2025-07-01 05:44:41.421 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:44:41.432 # on junk -- unless we have to)
2025-07-01 05:44:41.443 for j in range(blo, bhi):
2025-07-01 05:44:41.451 bj = b[j]
2025-07-01 05:44:41.458 cruncher.set_seq2(bj)
2025-07-01 05:44:41.463 for i in range(alo, ahi):
2025-07-01 05:44:41.469 ai = a[i]
2025-07-01 05:44:41.475 if ai == bj:
2025-07-01 05:44:41.486 if eqi is None:
2025-07-01 05:44:41.495 eqi, eqj = i, j
2025-07-01 05:44:41.501 continue
2025-07-01 05:44:41.507 cruncher.set_seq1(ai)
2025-07-01 05:44:41.513 # computing similarity is expensive, so use the quick
2025-07-01 05:44:41.519 # upper bounds first -- have seen this speed up messy
2025-07-01 05:44:41.526 # compares by a factor of 3.
2025-07-01 05:44:41.537 # note that ratio() is only expensive to compute the first
2025-07-01 05:44:41.543 # time it's called on a sequence pair; the expensive part
2025-07-01 05:44:41.550 # of the computation is cached by cruncher
2025-07-01 05:44:41.557 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:44:41.562 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:44:41.567 cruncher.ratio() > best_ratio:
2025-07-01 05:44:41.573 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:44:41.580 if best_ratio < cutoff:
2025-07-01 05:44:41.586 # no non-identical "pretty close" pair
2025-07-01 05:44:41.597 if eqi is None:
2025-07-01 05:44:41.604 # no identical pair either -- treat it as a straight replace
2025-07-01 05:44:41.611 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:44:41.617 return
2025-07-01 05:44:41.622 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:44:41.633 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:44:41.643 else:
2025-07-01 05:44:41.649 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:44:41.656 eqi = None
2025-07-01 05:44:41.663
2025-07-01 05:44:41.669 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:44:41.675 # identical
2025-07-01 05:44:41.680
2025-07-01 05:44:41.685 # pump out diffs from before the synch point
2025-07-01 05:44:41.690 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:44:41.695
2025-07-01 05:44:41.701 # do intraline marking on the synch pair
2025-07-01 05:44:41.706 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:44:41.711 if eqi is None:
2025-07-01 05:44:41.718 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:44:41.730 atags = btags = ""
2025-07-01 05:44:41.739 cruncher.set_seqs(aelt, belt)
2025-07-01 05:44:41.746 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:44:41.756 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:44:41.768 if tag == 'replace':
2025-07-01 05:44:41.779 atags += '^' * la
2025-07-01 05:44:41.791 btags += '^' * lb
2025-07-01 05:44:41.801 elif tag == 'delete':
2025-07-01 05:44:41.808 atags += '-' * la
2025-07-01 05:44:41.815 elif tag == 'insert':
2025-07-01 05:44:41.822 btags += '+' * lb
2025-07-01 05:44:41.833 elif tag == 'equal':
2025-07-01 05:44:41.843 atags += ' ' * la
2025-07-01 05:44:41.851 btags += ' ' * lb
2025-07-01 05:44:41.857 else:
2025-07-01 05:44:41.862 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:44:41.868 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:44:41.874 else:
2025-07-01 05:44:41.880 # the synch pair is identical
2025-07-01 05:44:41.887 yield ' ' + aelt
2025-07-01 05:44:41.894
2025-07-01 05:44:41.903 # pump out diffs from after the synch point
2025-07-01 05:44:41.915 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:44:41.925
2025-07-01 05:44:41.936 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:44:41.946 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:44:41.953
2025-07-01 05:44:41.960 self = <difflib.Differ object at [hex]>
2025-07-01 05:44:41.969 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:44:41.975 alo = 41, ahi = 1101
2025-07-01 05:44:41.982 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:44:41.988 blo = 41, bhi = 1101
2025-07-01 05:44:41.995
2025-07-01 05:44:42.006 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:44:42.017 g = []
2025-07-01 05:44:42.027 if alo < ahi:
2025-07-01 05:44:42.036 if blo < bhi:
2025-07-01 05:44:42.048 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:44:42.059 else:
2025-07-01 05:44:42.068 g = self._dump('-', a, alo, ahi)
2025-07-01 05:44:42.075 elif blo < bhi:
2025-07-01 05:44:42.082 g = self._dump('+', b, blo, bhi)
2025-07-01 05:44:42.090
2025-07-01 05:44:42.102 > yield from g
2025-07-01 05:44:42.114
2025-07-01 05:44:42.125 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:44:42.137 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:44:42.150
2025-07-01 05:44:42.162 self = <difflib.Differ object at [hex]>
2025-07-01 05:44:42.172 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:44:42.182 alo = 41, ahi = 1101
2025-07-01 05:44:42.191 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:44:42.198 blo = 41, bhi = 1101
2025-07-01 05:44:42.204
2025-07-01 05:44:42.211 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:44:42.219 r"""
2025-07-01 05:44:42.230 When replacing one block of lines with another, search the blocks
2025-07-01 05:44:42.240 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:44:42.248 synch point, and intraline difference marking is done on the
2025-07-01 05:44:42.255 similar pair. Lots of work, but often worth it.
2025-07-01 05:44:42.263
2025-07-01 05:44:42.275 Example:
2025-07-01 05:44:42.283
2025-07-01 05:44:42.290 >>> d = Differ()
2025-07-01 05:44:42.296 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:44:42.302 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:44:42.308 >>> print(''.join(results), end="")
2025-07-01 05:44:42.315 - abcDefghiJkl
2025-07-01 05:44:42.331 + abcdefGhijkl
2025-07-01 05:44:42.356 """
2025-07-01 05:44:42.367
2025-07-01 05:44:42.376 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:44:42.389 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:44:42.394 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:44:42.399 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:44:42.405 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:44:42.410
2025-07-01 05:44:42.416 # search for the pair that matches best without being identical
2025-07-01 05:44:42.422 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:44:42.428 # on junk -- unless we have to)
2025-07-01 05:44:42.433 for j in range(blo, bhi):
2025-07-01 05:44:42.439 bj = b[j]
2025-07-01 05:44:42.445 cruncher.set_seq2(bj)
2025-07-01 05:44:42.451 for i in range(alo, ahi):
2025-07-01 05:44:42.456 ai = a[i]
2025-07-01 05:44:42.462 if ai == bj:
2025-07-01 05:44:42.472 if eqi is None:
2025-07-01 05:44:42.483 eqi, eqj = i, j
2025-07-01 05:44:42.490 continue
2025-07-01 05:44:42.498 cruncher.set_seq1(ai)
2025-07-01 05:44:42.509 # computing similarity is expensive, so use the quick
2025-07-01 05:44:42.518 # upper bounds first -- have seen this speed up messy
2025-07-01 05:44:42.529 # compares by a factor of 3.
2025-07-01 05:44:42.541 # note that ratio() is only expensive to compute the first
2025-07-01 05:44:42.553 # time it's called on a sequence pair; the expensive part
2025-07-01 05:44:42.564 # of the computation is cached by cruncher
2025-07-01 05:44:42.577 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:44:42.588 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:44:42.597 cruncher.ratio() > best_ratio:
2025-07-01 05:44:42.604 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:44:42.611 if best_ratio < cutoff:
2025-07-01 05:44:42.618 # no non-identical "pretty close" pair
2025-07-01 05:44:42.629 if eqi is None:
2025-07-01 05:44:42.640 # no identical pair either -- treat it as a straight replace
2025-07-01 05:44:42.651 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:44:42.661 return
2025-07-01 05:44:42.672 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:44:42.681 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:44:42.690 else:
2025-07-01 05:44:42.702 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:44:42.714 eqi = None
2025-07-01 05:44:42.724
2025-07-01 05:44:42.732 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:44:42.739 # identical
2025-07-01 05:44:42.747
2025-07-01 05:44:42.758 # pump out diffs from before the synch point
2025-07-01 05:44:42.766 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:44:42.773
2025-07-01 05:44:42.781 # do intraline marking on the synch pair
2025-07-01 05:44:42.793 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:44:42.803 if eqi is None:
2025-07-01 05:44:42.816 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:44:42.827 atags = btags = ""
2025-07-01 05:44:42.835 cruncher.set_seqs(aelt, belt)
2025-07-01 05:44:42.841 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:44:42.846 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:44:42.853 if tag == 'replace':
2025-07-01 05:44:42.860 atags += '^' * la
2025-07-01 05:44:42.866 btags += '^' * lb
2025-07-01 05:44:42.872 elif tag == 'delete':
2025-07-01 05:44:42.877 atags += '-' * la
2025-07-01 05:44:42.883 elif tag == 'insert':
2025-07-01 05:44:42.889 btags += '+' * lb
2025-07-01 05:44:42.894 elif tag == 'equal':
2025-07-01 05:44:42.905 atags += ' ' * la
2025-07-01 05:44:42.914 btags += ' ' * lb
2025-07-01 05:44:42.921 else:
2025-07-01 05:44:42.932 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:44:42.945 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:44:42.954 else:
2025-07-01 05:44:42.961 # the synch pair is identical
2025-07-01 05:44:42.967 yield ' ' + aelt
2025-07-01 05:44:42.974
2025-07-01 05:44:42.980 # pump out diffs from after the synch point
2025-07-01 05:44:42.987 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:44:42.995
2025-07-01 05:44:43.008 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:44:43.016 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:44:43.022
2025-07-01 05:44:43.028 self = <difflib.Differ object at [hex]>
2025-07-01 05:44:43.036 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:44:43.043 alo = 42, ahi = 1101
2025-07-01 05:44:43.051 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:44:43.059 blo = 42, bhi = 1101
2025-07-01 05:44:43.070
2025-07-01 05:44:43.078 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:44:43.084 g = []
2025-07-01 05:44:43.090 if alo < ahi:
2025-07-01 05:44:43.097 if blo < bhi:
2025-07-01 05:44:43.103 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:44:43.110 else:
2025-07-01 05:44:43.117 g = self._dump('-', a, alo, ahi)
2025-07-01 05:44:43.123 elif blo < bhi:
2025-07-01 05:44:43.129 g = self._dump('+', b, blo, bhi)
2025-07-01 05:44:43.134
2025-07-01 05:44:43.140 > yield from g
2025-07-01 05:44:43.146
2025-07-01 05:44:43.152 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:44:43.158 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:44:43.163
2025-07-01 05:44:43.169 self = <difflib.Differ object at [hex]>
2025-07-01 05:44:43.175 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:44:43.181 alo = 42, ahi = 1101
2025-07-01 05:44:43.187 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:44:43.193 blo = 42, bhi = 1101
2025-07-01 05:44:43.198
2025-07-01 05:44:43.204 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:44:43.210 r"""
2025-07-01 05:44:43.221 When replacing one block of lines with another, search the blocks
2025-07-01 05:44:43.229 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:44:43.235 synch point, and intraline difference marking is done on the
2025-07-01 05:44:43.241 similar pair. Lots of work, but often worth it.
2025-07-01 05:44:43.246
2025-07-01 05:44:43.252 Example:
2025-07-01 05:44:43.258
2025-07-01 05:44:43.264 >>> d = Differ()
2025-07-01 05:44:43.270 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:44:43.276 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:44:43.281 >>> print(''.join(results), end="")
2025-07-01 05:44:43.287 - abcDefghiJkl
2025-07-01 05:44:43.298 + abcdefGhijkl
2025-07-01 05:44:43.310 """
2025-07-01 05:44:43.315
2025-07-01 05:44:43.321 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:44:43.327 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:44:43.332 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:44:43.338 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:44:43.344 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:44:43.350
2025-07-01 05:44:43.356 # search for the pair that matches best without being identical
2025-07-01 05:44:43.361 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:44:43.367 # on junk -- unless we have to)
2025-07-01 05:44:43.373 for j in range(blo, bhi):
2025-07-01 05:44:43.378 bj = b[j]
2025-07-01 05:44:43.384 cruncher.set_seq2(bj)
2025-07-01 05:44:43.390 for i in range(alo, ahi):
2025-07-01 05:44:43.395 ai = a[i]
2025-07-01 05:44:43.401 if ai == bj:
2025-07-01 05:44:43.407 if eqi is None:
2025-07-01 05:44:43.413 eqi, eqj = i, j
2025-07-01 05:44:43.418 continue
2025-07-01 05:44:43.424 cruncher.set_seq1(ai)
2025-07-01 05:44:43.430 # computing similarity is expensive, so use the quick
2025-07-01 05:44:43.436 # upper bounds first -- have seen this speed up messy
2025-07-01 05:44:43.442 # compares by a factor of 3.
2025-07-01 05:44:43.447 # note that ratio() is only expensive to compute the first
2025-07-01 05:44:43.453 # time it's called on a sequence pair; the expensive part
2025-07-01 05:44:43.459 # of the computation is cached by cruncher
2025-07-01 05:44:43.465 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:44:43.471 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:44:43.477 cruncher.ratio() > best_ratio:
2025-07-01 05:44:43.483 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:44:43.489 if best_ratio < cutoff:
2025-07-01 05:44:43.495 # no non-identical "pretty close" pair
2025-07-01 05:44:43.501 if eqi is None:
2025-07-01 05:44:43.507 # no identical pair either -- treat it as a straight replace
2025-07-01 05:44:43.513 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:44:43.518 return
2025-07-01 05:44:43.524 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:44:43.530 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:44:43.536 else:
2025-07-01 05:44:43.542 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:44:43.548 eqi = None
2025-07-01 05:44:43.553
2025-07-01 05:44:43.559 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:44:43.565 # identical
2025-07-01 05:44:43.571
2025-07-01 05:44:43.577 # pump out diffs from before the synch point
2025-07-01 05:44:43.583 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:44:43.588
2025-07-01 05:44:43.594 # do intraline marking on the synch pair
2025-07-01 05:44:43.600 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:44:43.606 if eqi is None:
2025-07-01 05:44:43.611 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:44:43.617 atags = btags = ""
2025-07-01 05:44:43.623 cruncher.set_seqs(aelt, belt)
2025-07-01 05:44:43.629 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:44:43.634 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:44:43.640 if tag == 'replace':
2025-07-01 05:44:43.646 atags += '^' * la
2025-07-01 05:44:43.651 btags += '^' * lb
2025-07-01 05:44:43.657 elif tag == 'delete':
2025-07-01 05:44:43.663 atags += '-' * la
2025-07-01 05:44:43.669 elif tag == 'insert':
2025-07-01 05:44:43.674 btags += '+' * lb
2025-07-01 05:44:43.680 elif tag == 'equal':
2025-07-01 05:44:43.686 atags += ' ' * la
2025-07-01 05:44:43.692 btags += ' ' * lb
2025-07-01 05:44:43.697 else:
2025-07-01 05:44:43.703 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:44:43.709 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:44:43.715 else:
2025-07-01 05:44:43.720 # the synch pair is identical
2025-07-01 05:44:43.726 yield ' ' + aelt
2025-07-01 05:44:43.732
2025-07-01 05:44:43.738 # pump out diffs from after the synch point
2025-07-01 05:44:43.743 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:44:43.750
2025-07-01 05:44:43.757 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:44:43.763 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:44:43.769
2025-07-01 05:44:43.774 self = <difflib.Differ object at [hex]>
2025-07-01 05:44:43.780 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:44:43.785 alo = 43, ahi = 1101
2025-07-01 05:44:43.790 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:44:43.799 blo = 43, bhi = 1101
2025-07-01 05:44:43.809
2025-07-01 05:44:43.817 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:44:43.825 g = []
2025-07-01 05:44:43.831 if alo < ahi:
2025-07-01 05:44:43.836 if blo < bhi:
2025-07-01 05:44:43.842 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:44:43.849 else:
2025-07-01 05:44:43.855 g = self._dump('-', a, alo, ahi)
2025-07-01 05:44:43.861 elif blo < bhi:
2025-07-01 05:44:43.867 g = self._dump('+', b, blo, bhi)
2025-07-01 05:44:43.874
2025-07-01 05:44:43.880 > yield from g
2025-07-01 05:44:43.887
2025-07-01 05:44:43.894 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:44:43.905 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:44:43.916
2025-07-01 05:44:43.927 self = <difflib.Differ object at [hex]>
2025-07-01 05:44:43.936 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:44:43.942 alo = 43, ahi = 1101
2025-07-01 05:44:43.951 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:44:43.958 blo = 43, bhi = 1101
2025-07-01 05:44:43.965
2025-07-01 05:44:43.973 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:44:43.980 r"""
2025-07-01 05:44:43.987 When replacing one block of lines with another, search the blocks
2025-07-01 05:44:43.995 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:44:44.003 synch point, and intraline difference marking is done on the
2025-07-01 05:44:44.010 similar pair. Lots of work, but often worth it.
2025-07-01 05:44:44.017
2025-07-01 05:44:44.023 Example:
2025-07-01 05:44:44.031
2025-07-01 05:44:44.042 >>> d = Differ()
2025-07-01 05:44:44.049 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:44:44.055 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:44:44.062 >>> print(''.join(results), end="")
2025-07-01 05:44:44.073 - abcDefghiJkl
2025-07-01 05:44:44.090 + abcdefGhijkl
2025-07-01 05:44:44.101 """
2025-07-01 05:44:44.106
2025-07-01 05:44:44.112 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:44:44.118 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:44:44.124 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:44:44.130 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:44:44.136 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:44:44.142
2025-07-01 05:44:44.148 # search for the pair that matches best without being identical
2025-07-01 05:44:44.154 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:44:44.160 # on junk -- unless we have to)
2025-07-01 05:44:44.166 for j in range(blo, bhi):
2025-07-01 05:44:44.171 bj = b[j]
2025-07-01 05:44:44.179 cruncher.set_seq2(bj)
2025-07-01 05:44:44.186 for i in range(alo, ahi):
2025-07-01 05:44:44.191 ai = a[i]
2025-07-01 05:44:44.200 if ai == bj:
2025-07-01 05:44:44.213 if eqi is None:
2025-07-01 05:44:44.220 eqi, eqj = i, j
2025-07-01 05:44:44.227 continue
2025-07-01 05:44:44.235 cruncher.set_seq1(ai)
2025-07-01 05:44:44.246 # computing similarity is expensive, so use the quick
2025-07-01 05:44:44.255 # upper bounds first -- have seen this speed up messy
2025-07-01 05:44:44.263 # compares by a factor of 3.
2025-07-01 05:44:44.270 # note that ratio() is only expensive to compute the first
2025-07-01 05:44:44.276 # time it's called on a sequence pair; the expensive part
2025-07-01 05:44:44.281 # of the computation is cached by cruncher
2025-07-01 05:44:44.287 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:44:44.295 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:44:44.304 cruncher.ratio() > best_ratio:
2025-07-01 05:44:44.313 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:44:44.320 if best_ratio < cutoff:
2025-07-01 05:44:44.326 # no non-identical "pretty close" pair
2025-07-01 05:44:44.338 if eqi is None:
2025-07-01 05:44:44.348 # no identical pair either -- treat it as a straight replace
2025-07-01 05:44:44.359 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:44:44.368 return
2025-07-01 05:44:44.376 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:44:44.384 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:44:44.390 else:
2025-07-01 05:44:44.403 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:44:44.414 eqi = None
2025-07-01 05:44:44.426
2025-07-01 05:44:44.434 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:44:44.441 # identical
2025-07-01 05:44:44.447
2025-07-01 05:44:44.454 # pump out diffs from before the synch point
2025-07-01 05:44:44.460 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:44:44.466
2025-07-01 05:44:44.472 # do intraline marking on the synch pair
2025-07-01 05:44:44.478 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:44:44.486 if eqi is None:
2025-07-01 05:44:44.494 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:44:44.500 atags = btags = ""
2025-07-01 05:44:44.507 cruncher.set_seqs(aelt, belt)
2025-07-01 05:44:44.518 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:44:44.525 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:44:44.532 if tag == 'replace':
2025-07-01 05:44:44.540 atags += '^' * la
2025-07-01 05:44:44.546 btags += '^' * lb
2025-07-01 05:44:44.552 elif tag == 'delete':
2025-07-01 05:44:44.558 atags += '-' * la
2025-07-01 05:44:44.564 elif tag == 'insert':
2025-07-01 05:44:44.569 btags += '+' * lb
2025-07-01 05:44:44.577 elif tag == 'equal':
2025-07-01 05:44:44.586 atags += ' ' * la
2025-07-01 05:44:44.593 btags += ' ' * lb
2025-07-01 05:44:44.599 else:
2025-07-01 05:44:44.606 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:44:44.612 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:44:44.619 else:
2025-07-01 05:44:44.632 # the synch pair is identical
2025-07-01 05:44:44.642 yield ' ' + aelt
2025-07-01 05:44:44.654
2025-07-01 05:44:44.663 # pump out diffs from after the synch point
2025-07-01 05:44:44.670 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:44:44.675
2025-07-01 05:44:44.682 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:44:44.690 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:44:44.695
2025-07-01 05:44:44.706 self = <difflib.Differ object at [hex]>
2025-07-01 05:44:44.713 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:44:44.719 alo = 44, ahi = 1101
2025-07-01 05:44:44.726 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:44:44.733 blo = 44, bhi = 1101
2025-07-01 05:44:44.739
2025-07-01 05:44:44.752 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:44:44.762 g = []
2025-07-01 05:44:44.769 if alo < ahi:
2025-07-01 05:44:44.776 if blo < bhi:
2025-07-01 05:44:44.781 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:44:44.787 else:
2025-07-01 05:44:44.795 g = self._dump('-', a, alo, ahi)
2025-07-01 05:44:44.801 elif blo < bhi:
2025-07-01 05:44:44.807 g = self._dump('+', b, blo, bhi)
2025-07-01 05:44:44.813
2025-07-01 05:44:44.820 > yield from g
2025-07-01 05:44:44.826
2025-07-01 05:44:44.831 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:44:44.835 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:44:44.843
2025-07-01 05:44:44.853 self = <difflib.Differ object at [hex]>
2025-07-01 05:44:44.860 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:44:44.868 alo = 44, ahi = 1101
2025-07-01 05:44:44.874 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:44:44.880 blo = 44, bhi = 1101
2025-07-01 05:44:44.886
2025-07-01 05:44:44.893 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:44:44.898 r"""
2025-07-01 05:44:44.907 When replacing one block of lines with another, search the blocks
2025-07-01 05:44:44.918 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:44:44.925 synch point, and intraline difference marking is done on the
2025-07-01 05:44:44.932 similar pair. Lots of work, but often worth it.
2025-07-01 05:44:44.938
2025-07-01 05:44:44.945 Example:
2025-07-01 05:44:44.950
2025-07-01 05:44:44.957 >>> d = Differ()
2025-07-01 05:44:44.964 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:44:44.971 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:44:44.978 >>> print(''.join(results), end="")
2025-07-01 05:44:44.988 - abcDefghiJkl
2025-07-01 05:44:45.006 + abcdefGhijkl
2025-07-01 05:44:45.021 """
2025-07-01 05:44:45.033
2025-07-01 05:44:45.043 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:44:45.050 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:44:45.056 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:44:45.061 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:44:45.067 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:44:45.073
2025-07-01 05:44:45.079 # search for the pair that matches best without being identical
2025-07-01 05:44:45.086 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:44:45.092 # on junk -- unless we have to)
2025-07-01 05:44:45.099 for j in range(blo, bhi):
2025-07-01 05:44:45.111 bj = b[j]
2025-07-01 05:44:45.118 cruncher.set_seq2(bj)
2025-07-01 05:44:45.124 for i in range(alo, ahi):
2025-07-01 05:44:45.131 ai = a[i]
2025-07-01 05:44:45.137 if ai == bj:
2025-07-01 05:44:45.143 if eqi is None:
2025-07-01 05:44:45.151 eqi, eqj = i, j
2025-07-01 05:44:45.159 continue
2025-07-01 05:44:45.164 cruncher.set_seq1(ai)
2025-07-01 05:44:45.169 # computing similarity is expensive, so use the quick
2025-07-01 05:44:45.175 # upper bounds first -- have seen this speed up messy
2025-07-01 05:44:45.181 # compares by a factor of 3.
2025-07-01 05:44:45.187 # note that ratio() is only expensive to compute the first
2025-07-01 05:44:45.193 # time it's called on a sequence pair; the expensive part
2025-07-01 05:44:45.200 # of the computation is cached by cruncher
2025-07-01 05:44:45.206 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:44:45.213 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:44:45.219 cruncher.ratio() > best_ratio:
2025-07-01 05:44:45.225 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:44:45.231 if best_ratio < cutoff:
2025-07-01 05:44:45.237 # no non-identical "pretty close" pair
2025-07-01 05:44:45.242 if eqi is None:
2025-07-01 05:44:45.249 # no identical pair either -- treat it as a straight replace
2025-07-01 05:44:45.255 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:44:45.261 return
2025-07-01 05:44:45.267 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:44:45.273 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:44:45.280 else:
2025-07-01 05:44:45.288 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:44:45.295 eqi = None
2025-07-01 05:44:45.301
2025-07-01 05:44:45.308 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:44:45.314 # identical
2025-07-01 05:44:45.321
2025-07-01 05:44:45.328 # pump out diffs from before the synch point
2025-07-01 05:44:45.335 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:44:45.342
2025-07-01 05:44:45.349 # do intraline marking on the synch pair
2025-07-01 05:44:45.359 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:44:45.369 if eqi is None:
2025-07-01 05:44:45.376 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:44:45.382 atags = btags = ""
2025-07-01 05:44:45.388 cruncher.set_seqs(aelt, belt)
2025-07-01 05:44:45.394 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:44:45.400 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:44:45.406 if tag == 'replace':
2025-07-01 05:44:45.412 atags += '^' * la
2025-07-01 05:44:45.418 btags += '^' * lb
2025-07-01 05:44:45.424 elif tag == 'delete':
2025-07-01 05:44:45.430 atags += '-' * la
2025-07-01 05:44:45.436 elif tag == 'insert':
2025-07-01 05:44:45.442 btags += '+' * lb
2025-07-01 05:44:45.448 elif tag == 'equal':
2025-07-01 05:44:45.454 atags += ' ' * la
2025-07-01 05:44:45.460 btags += ' ' * lb
2025-07-01 05:44:45.466 else:
2025-07-01 05:44:45.471 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:44:45.477 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:44:45.483 else:
2025-07-01 05:44:45.489 # the synch pair is identical
2025-07-01 05:44:45.495 yield ' ' + aelt
2025-07-01 05:44:45.501
2025-07-01 05:44:45.507 # pump out diffs from after the synch point
2025-07-01 05:44:45.513 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:44:45.520
2025-07-01 05:44:45.527 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:44:45.533 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:44:45.538
2025-07-01 05:44:45.544 self = <difflib.Differ object at [hex]>
2025-07-01 05:44:45.551 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:44:45.557 alo = 45, ahi = 1101
2025-07-01 05:44:45.570 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:44:45.580 blo = 45, bhi = 1101
2025-07-01 05:44:45.588
2025-07-01 05:44:45.597 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:44:45.606 g = []
2025-07-01 05:44:45.616 if alo < ahi:
2025-07-01 05:44:45.626 if blo < bhi:
2025-07-01 05:44:45.635 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:44:45.643 else:
2025-07-01 05:44:45.650 g = self._dump('-', a, alo, ahi)
2025-07-01 05:44:45.657 elif blo < bhi:
2025-07-01 05:44:45.663 g = self._dump('+', b, blo, bhi)
2025-07-01 05:44:45.668
2025-07-01 05:44:45.674 > yield from g
2025-07-01 05:44:45.684
2025-07-01 05:44:45.694 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:44:45.703 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:44:45.710
2025-07-01 05:44:45.720 self = <difflib.Differ object at [hex]>
2025-07-01 05:44:45.726 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:44:45.731 alo = 45, ahi = 1101
2025-07-01 05:44:45.741 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:44:45.751 blo = 45, bhi = 1101
2025-07-01 05:44:45.759
2025-07-01 05:44:45.771 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:44:45.779 r"""
2025-07-01 05:44:45.789 When replacing one block of lines with another, search the blocks
2025-07-01 05:44:45.797 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:44:45.808 synch point, and intraline difference marking is done on the
2025-07-01 05:44:45.817 similar pair. Lots of work, but often worth it.
2025-07-01 05:44:45.824
2025-07-01 05:44:45.831 Example:
2025-07-01 05:44:45.840
2025-07-01 05:44:45.851 >>> d = Differ()
2025-07-01 05:44:45.860 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:44:45.867 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:44:45.874 >>> print(''.join(results), end="")
2025-07-01 05:44:45.884 - abcDefghiJkl
2025-07-01 05:44:45.907 + abcdefGhijkl
2025-07-01 05:44:45.931 """
2025-07-01 05:44:45.940
2025-07-01 05:44:45.948 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:44:45.955 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:44:45.962 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:44:45.968 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:44:45.974 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:44:45.979
2025-07-01 05:44:45.987 # search for the pair that matches best without being identical
2025-07-01 05:44:45.997 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:44:46.006 # on junk -- unless we have to)
2025-07-01 05:44:46.014 for j in range(blo, bhi):
2025-07-01 05:44:46.022 bj = b[j]
2025-07-01 05:44:46.028 cruncher.set_seq2(bj)
2025-07-01 05:44:46.035 for i in range(alo, ahi):
2025-07-01 05:44:46.040 ai = a[i]
2025-07-01 05:44:46.046 if ai == bj:
2025-07-01 05:44:46.052 if eqi is None:
2025-07-01 05:44:46.058 eqi, eqj = i, j
2025-07-01 05:44:46.064 continue
2025-07-01 05:44:46.070 cruncher.set_seq1(ai)
2025-07-01 05:44:46.075 # computing similarity is expensive, so use the quick
2025-07-01 05:44:46.081 # upper bounds first -- have seen this speed up messy
2025-07-01 05:44:46.087 # compares by a factor of 3.
2025-07-01 05:44:46.093 # note that ratio() is only expensive to compute the first
2025-07-01 05:44:46.099 # time it's called on a sequence pair; the expensive part
2025-07-01 05:44:46.105 # of the computation is cached by cruncher
2025-07-01 05:44:46.111 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:44:46.116 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:44:46.122 cruncher.ratio() > best_ratio:
2025-07-01 05:44:46.129 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:44:46.135 if best_ratio < cutoff:
2025-07-01 05:44:46.141 # no non-identical "pretty close" pair
2025-07-01 05:44:46.148 if eqi is None:
2025-07-01 05:44:46.155 # no identical pair either -- treat it as a straight replace
2025-07-01 05:44:46.163 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:44:46.169 return
2025-07-01 05:44:46.176 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:44:46.189 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:44:46.200 else:
2025-07-01 05:44:46.212 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:44:46.222 eqi = None
2025-07-01 05:44:46.228
2025-07-01 05:44:46.234 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:44:46.240 # identical
2025-07-01 05:44:46.246
2025-07-01 05:44:46.253 # pump out diffs from before the synch point
2025-07-01 05:44:46.260 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:44:46.272
2025-07-01 05:44:46.281 # do intraline marking on the synch pair
2025-07-01 05:44:46.288 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:44:46.295 if eqi is None:
2025-07-01 05:44:46.299 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:44:46.304 atags = btags = ""
2025-07-01 05:44:46.309 cruncher.set_seqs(aelt, belt)
2025-07-01 05:44:46.314 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:44:46.318 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:44:46.323 if tag == 'replace':
2025-07-01 05:44:46.328 atags += '^' * la
2025-07-01 05:44:46.333 btags += '^' * lb
2025-07-01 05:44:46.338 elif tag == 'delete':
2025-07-01 05:44:46.344 atags += '-' * la
2025-07-01 05:44:46.349 elif tag == 'insert':
2025-07-01 05:44:46.355 btags += '+' * lb
2025-07-01 05:44:46.363 elif tag == 'equal':
2025-07-01 05:44:46.371 atags += ' ' * la
2025-07-01 05:44:46.377 btags += ' ' * lb
2025-07-01 05:44:46.383 else:
2025-07-01 05:44:46.389 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:44:46.394 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:44:46.399 else:
2025-07-01 05:44:46.408 # the synch pair is identical
2025-07-01 05:44:46.417 yield ' ' + aelt
2025-07-01 05:44:46.423
2025-07-01 05:44:46.429 # pump out diffs from after the synch point
2025-07-01 05:44:46.436 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:44:46.443
2025-07-01 05:44:46.449 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:44:46.456 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:44:46.463
2025-07-01 05:44:46.470 self = <difflib.Differ object at [hex]>
2025-07-01 05:44:46.478 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:44:46.487 alo = 48, ahi = 1101
2025-07-01 05:44:46.495 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:44:46.502 blo = 48, bhi = 1101
2025-07-01 05:44:46.507
2025-07-01 05:44:46.512 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:44:46.517 g = []
2025-07-01 05:44:46.522 if alo < ahi:
2025-07-01 05:44:46.527 if blo < bhi:
2025-07-01 05:44:46.532 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:44:46.536 else:
2025-07-01 05:44:46.547 g = self._dump('-', a, alo, ahi)
2025-07-01 05:44:46.554 elif blo < bhi:
2025-07-01 05:44:46.560 g = self._dump('+', b, blo, bhi)
2025-07-01 05:44:46.567
2025-07-01 05:44:46.575 > yield from g
2025-07-01 05:44:46.586
2025-07-01 05:44:46.594 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:44:46.601 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:44:46.607
2025-07-01 05:44:46.613 self = <difflib.Differ object at [hex]>
2025-07-01 05:44:46.620 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:44:46.626 alo = 48, ahi = 1101
2025-07-01 05:44:46.633 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:44:46.639 blo = 48, bhi = 1101
2025-07-01 05:44:46.647
2025-07-01 05:44:46.657 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:44:46.662 r"""
2025-07-01 05:44:46.668 When replacing one block of lines with another, search the blocks
2025-07-01 05:44:46.675 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:44:46.680 synch point, and intraline difference marking is done on the
2025-07-01 05:44:46.685 similar pair. Lots of work, but often worth it.
2025-07-01 05:44:46.689
2025-07-01 05:44:46.695 Example:
2025-07-01 05:44:46.701
2025-07-01 05:44:46.707 >>> d = Differ()
2025-07-01 05:44:46.714 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:44:46.723 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:44:46.732 >>> print(''.join(results), end="")
2025-07-01 05:44:46.739 - abcDefghiJkl
2025-07-01 05:44:46.750 + abcdefGhijkl
2025-07-01 05:44:46.762 """
2025-07-01 05:44:46.769
2025-07-01 05:44:46.774 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:44:46.780 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:44:46.786 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:44:46.793 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:44:46.800 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:44:46.806
2025-07-01 05:44:46.817 # search for the pair that matches best without being identical
2025-07-01 05:44:46.827 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:44:46.834 # on junk -- unless we have to)
2025-07-01 05:44:46.840 for j in range(blo, bhi):
2025-07-01 05:44:46.845 bj = b[j]
2025-07-01 05:44:46.851 cruncher.set_seq2(bj)
2025-07-01 05:44:46.858 for i in range(alo, ahi):
2025-07-01 05:44:46.865 ai = a[i]
2025-07-01 05:44:46.871 if ai == bj:
2025-07-01 05:44:46.878 if eqi is None:
2025-07-01 05:44:46.884 eqi, eqj = i, j
2025-07-01 05:44:46.891 continue
2025-07-01 05:44:46.898 cruncher.set_seq1(ai)
2025-07-01 05:44:46.905 # computing similarity is expensive, so use the quick
2025-07-01 05:44:46.912 # upper bounds first -- have seen this speed up messy
2025-07-01 05:44:46.918 # compares by a factor of 3.
2025-07-01 05:44:46.925 # note that ratio() is only expensive to compute the first
2025-07-01 05:44:46.932 # time it's called on a sequence pair; the expensive part
2025-07-01 05:44:46.938 # of the computation is cached by cruncher
2025-07-01 05:44:46.944 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:44:46.950 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:44:46.960 cruncher.ratio() > best_ratio:
2025-07-01 05:44:46.970 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:44:46.977 if best_ratio < cutoff:
2025-07-01 05:44:46.984 # no non-identical "pretty close" pair
2025-07-01 05:44:46.989 if eqi is None:
2025-07-01 05:44:46.995 # no identical pair either -- treat it as a straight replace
2025-07-01 05:44:47.001 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:44:47.007 return
2025-07-01 05:44:47.013 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:44:47.020 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:44:47.026 else:
2025-07-01 05:44:47.033 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:44:47.040 eqi = None
2025-07-01 05:44:47.046
2025-07-01 05:44:47.054 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:44:47.062 # identical
2025-07-01 05:44:47.068
2025-07-01 05:44:47.075 # pump out diffs from before the synch point
2025-07-01 05:44:47.080 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:44:47.086
2025-07-01 05:44:47.095 # do intraline marking on the synch pair
2025-07-01 05:44:47.102 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:44:47.108 if eqi is None:
2025-07-01 05:44:47.114 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:44:47.123 atags = btags = ""
2025-07-01 05:44:47.131 cruncher.set_seqs(aelt, belt)
2025-07-01 05:44:47.138 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:44:47.144 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:44:47.149 if tag == 'replace':
2025-07-01 05:44:47.161 atags += '^' * la
2025-07-01 05:44:47.172 btags += '^' * lb
2025-07-01 05:44:47.179 elif tag == 'delete':
2025-07-01 05:44:47.189 atags += '-' * la
2025-07-01 05:44:47.196 elif tag == 'insert':
2025-07-01 05:44:47.202 btags += '+' * lb
2025-07-01 05:44:47.207 elif tag == 'equal':
2025-07-01 05:44:47.212 atags += ' ' * la
2025-07-01 05:44:47.218 btags += ' ' * lb
2025-07-01 05:44:47.223 else:
2025-07-01 05:44:47.229 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:44:47.235 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:44:47.242 else:
2025-07-01 05:44:47.251 # the synch pair is identical
2025-07-01 05:44:47.259 yield ' ' + aelt
2025-07-01 05:44:47.265
2025-07-01 05:44:47.271 # pump out diffs from after the synch point
2025-07-01 05:44:47.277 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:44:47.282
2025-07-01 05:44:47.289 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:44:47.296 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:44:47.302
2025-07-01 05:44:47.309 self = <difflib.Differ object at [hex]>
2025-07-01 05:44:47.316 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:44:47.328 alo = 49, ahi = 1101
2025-07-01 05:44:47.339 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:44:47.349 blo = 49, bhi = 1101
2025-07-01 05:44:47.356
2025-07-01 05:44:47.362 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:44:47.367 g = []
2025-07-01 05:44:47.372 if alo < ahi:
2025-07-01 05:44:47.377 if blo < bhi:
2025-07-01 05:44:47.382 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:44:47.389 else:
2025-07-01 05:44:47.395 g = self._dump('-', a, alo, ahi)
2025-07-01 05:44:47.402 elif blo < bhi:
2025-07-01 05:44:47.408 g = self._dump('+', b, blo, bhi)
2025-07-01 05:44:47.413
2025-07-01 05:44:47.420 > yield from g
2025-07-01 05:44:47.427
2025-07-01 05:44:47.436 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:44:47.450 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:44:47.459
2025-07-01 05:44:47.467 self = <difflib.Differ object at [hex]>
2025-07-01 05:44:47.476 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:44:47.482 alo = 49, ahi = 1101
2025-07-01 05:44:47.490 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:44:47.496 blo = 49, bhi = 1101
2025-07-01 05:44:47.501
2025-07-01 05:44:47.507 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:44:47.513 r"""
2025-07-01 05:44:47.519 When replacing one block of lines with another, search the blocks
2025-07-01 05:44:47.524 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:44:47.529 synch point, and intraline difference marking is done on the
2025-07-01 05:44:47.534 similar pair. Lots of work, but often worth it.
2025-07-01 05:44:47.544
2025-07-01 05:44:47.550 Example:
2025-07-01 05:44:47.556
2025-07-01 05:44:47.561 >>> d = Differ()
2025-07-01 05:44:47.567 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:44:47.578 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:44:47.588 >>> print(''.join(results), end="")
2025-07-01 05:44:47.596 - abcDefghiJkl
2025-07-01 05:44:47.619 + abcdefGhijkl
2025-07-01 05:44:47.640 """
2025-07-01 05:44:47.650
2025-07-01 05:44:47.658 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:44:47.666 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:44:47.679 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:44:47.691 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:44:47.702 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:44:47.709
2025-07-01 05:44:47.716 # search for the pair that matches best without being identical
2025-07-01 05:44:47.722 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:44:47.728 # on junk -- unless we have to)
2025-07-01 05:44:47.734 for j in range(blo, bhi):
2025-07-01 05:44:47.744 bj = b[j]
2025-07-01 05:44:47.754 cruncher.set_seq2(bj)
2025-07-01 05:44:47.762 for i in range(alo, ahi):
2025-07-01 05:44:47.768 ai = a[i]
2025-07-01 05:44:47.774 if ai == bj:
2025-07-01 05:44:47.781 if eqi is None:
2025-07-01 05:44:47.787 eqi, eqj = i, j
2025-07-01 05:44:47.793 continue
2025-07-01 05:44:47.799 cruncher.set_seq1(ai)
2025-07-01 05:44:47.803 # computing similarity is expensive, so use the quick
2025-07-01 05:44:47.810 # upper bounds first -- have seen this speed up messy
2025-07-01 05:44:47.816 # compares by a factor of 3.
2025-07-01 05:44:47.820 # note that ratio() is only expensive to compute the first
2025-07-01 05:44:47.826 # time it's called on a sequence pair; the expensive part
2025-07-01 05:44:47.834 # of the computation is cached by cruncher
2025-07-01 05:44:47.842 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:44:47.850 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:44:47.857 cruncher.ratio() > best_ratio:
2025-07-01 05:44:47.863 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:44:47.868 if best_ratio < cutoff:
2025-07-01 05:44:47.874 # no non-identical "pretty close" pair
2025-07-01 05:44:47.879 if eqi is None:
2025-07-01 05:44:47.885 # no identical pair either -- treat it as a straight replace
2025-07-01 05:44:47.892 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:44:47.899 return
2025-07-01 05:44:47.905 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:44:47.916 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:44:47.925 else:
2025-07-01 05:44:47.933 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:44:47.938 eqi = None
2025-07-01 05:44:47.944
2025-07-01 05:44:47.951 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:44:47.958 # identical
2025-07-01 05:44:47.968
2025-07-01 05:44:47.976 # pump out diffs from before the synch point
2025-07-01 05:44:47.982 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:44:47.988
2025-07-01 05:44:47.995 # do intraline marking on the synch pair
2025-07-01 05:44:48.001 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:44:48.008 if eqi is None:
2025-07-01 05:44:48.015 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:44:48.023 atags = btags = ""
2025-07-01 05:44:48.031 cruncher.set_seqs(aelt, belt)
2025-07-01 05:44:48.042 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:44:48.053 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:44:48.062 if tag == 'replace':
2025-07-01 05:44:48.069 atags += '^' * la
2025-07-01 05:44:48.075 btags += '^' * lb
2025-07-01 05:44:48.083 elif tag == 'delete':
2025-07-01 05:44:48.091 atags += '-' * la
2025-07-01 05:44:48.101 elif tag == 'insert':
2025-07-01 05:44:48.109 btags += '+' * lb
2025-07-01 05:44:48.120 elif tag == 'equal':
2025-07-01 05:44:48.128 atags += ' ' * la
2025-07-01 05:44:48.136 btags += ' ' * lb
2025-07-01 05:44:48.143 else:
2025-07-01 05:44:48.150 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:44:48.156 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:44:48.163 else:
2025-07-01 05:44:48.169 # the synch pair is identical
2025-07-01 05:44:48.176 yield ' ' + aelt
2025-07-01 05:44:48.183
2025-07-01 05:44:48.190 # pump out diffs from after the synch point
2025-07-01 05:44:48.199 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:44:48.209
2025-07-01 05:44:48.218 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:44:48.224 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:44:48.229
2025-07-01 05:44:48.235 self = <difflib.Differ object at [hex]>
2025-07-01 05:44:48.242 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:44:48.248 alo = 50, ahi = 1101
2025-07-01 05:44:48.256 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:44:48.263 blo = 50, bhi = 1101
2025-07-01 05:44:48.272
2025-07-01 05:44:48.280 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:44:48.287 g = []
2025-07-01 05:44:48.297 if alo < ahi:
2025-07-01 05:44:48.307 if blo < bhi:
2025-07-01 05:44:48.313 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:44:48.319 else:
2025-07-01 05:44:48.324 g = self._dump('-', a, alo, ahi)
2025-07-01 05:44:48.328 elif blo < bhi:
2025-07-01 05:44:48.334 g = self._dump('+', b, blo, bhi)
2025-07-01 05:44:48.339
2025-07-01 05:44:48.346 > yield from g
2025-07-01 05:44:48.352
2025-07-01 05:44:48.359 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:44:48.367 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:44:48.374
2025-07-01 05:44:48.382 self = <difflib.Differ object at [hex]>
2025-07-01 05:44:48.390 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:44:48.399 alo = 50, ahi = 1101
2025-07-01 05:44:48.412 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:44:48.420 blo = 50, bhi = 1101
2025-07-01 05:44:48.426
2025-07-01 05:44:48.438 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:44:48.447 r"""
2025-07-01 05:44:48.455 When replacing one block of lines with another, search the blocks
2025-07-01 05:44:48.461 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:44:48.468 synch point, and intraline difference marking is done on the
2025-07-01 05:44:48.475 similar pair. Lots of work, but often worth it.
2025-07-01 05:44:48.482
2025-07-01 05:44:48.490 Example:
2025-07-01 05:44:48.501
2025-07-01 05:44:48.509 >>> d = Differ()
2025-07-01 05:44:48.515 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:44:48.521 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:44:48.528 >>> print(''.join(results), end="")
2025-07-01 05:44:48.535 - abcDefghiJkl
2025-07-01 05:44:48.553 + abcdefGhijkl
2025-07-01 05:44:48.567 """
2025-07-01 05:44:48.573
2025-07-01 05:44:48.579 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:44:48.585 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:44:48.591 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:44:48.598 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:44:48.605 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:44:48.612
2025-07-01 05:44:48.623 # search for the pair that matches best without being identical
2025-07-01 05:44:48.631 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:44:48.637 # on junk -- unless we have to)
2025-07-01 05:44:48.643 for j in range(blo, bhi):
2025-07-01 05:44:48.650 bj = b[j]
2025-07-01 05:44:48.658 cruncher.set_seq2(bj)
2025-07-01 05:44:48.666 for i in range(alo, ahi):
2025-07-01 05:44:48.673 ai = a[i]
2025-07-01 05:44:48.679 if ai == bj:
2025-07-01 05:44:48.685 if eqi is None:
2025-07-01 05:44:48.691 eqi, eqj = i, j
2025-07-01 05:44:48.697 continue
2025-07-01 05:44:48.702 cruncher.set_seq1(ai)
2025-07-01 05:44:48.706 # computing similarity is expensive, so use the quick
2025-07-01 05:44:48.711 # upper bounds first -- have seen this speed up messy
2025-07-01 05:44:48.716 # compares by a factor of 3.
2025-07-01 05:44:48.720 # note that ratio() is only expensive to compute the first
2025-07-01 05:44:48.725 # time it's called on a sequence pair; the expensive part
2025-07-01 05:44:48.731 # of the computation is cached by cruncher
2025-07-01 05:44:48.737 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:44:48.748 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:44:48.758 cruncher.ratio() > best_ratio:
2025-07-01 05:44:48.766 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:44:48.774 if best_ratio < cutoff:
2025-07-01 05:44:48.785 # no non-identical "pretty close" pair
2025-07-01 05:44:48.795 if eqi is None:
2025-07-01 05:44:48.803 # no identical pair either -- treat it as a straight replace
2025-07-01 05:44:48.810 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:44:48.816 return
2025-07-01 05:44:48.825 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:44:48.837 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:44:48.847 else:
2025-07-01 05:44:48.856 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:44:48.864 eqi = None
2025-07-01 05:44:48.878
2025-07-01 05:44:48.889 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:44:48.899 # identical
2025-07-01 05:44:48.910
2025-07-01 05:44:48.920 # pump out diffs from before the synch point
2025-07-01 05:44:48.927 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:44:48.934
2025-07-01 05:44:48.940 # do intraline marking on the synch pair
2025-07-01 05:44:48.946 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:44:48.952 if eqi is None:
2025-07-01 05:44:48.958 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:44:48.967 atags = btags = ""
2025-07-01 05:44:48.979 cruncher.set_seqs(aelt, belt)
2025-07-01 05:44:48.988 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:44:48.997 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:44:49.003 if tag == 'replace':
2025-07-01 05:44:49.011 atags += '^' * la
2025-07-01 05:44:49.021 btags += '^' * lb
2025-07-01 05:44:49.029 elif tag == 'delete':
2025-07-01 05:44:49.036 atags += '-' * la
2025-07-01 05:44:49.042 elif tag == 'insert':
2025-07-01 05:44:49.051 btags += '+' * lb
2025-07-01 05:44:49.061 elif tag == 'equal':
2025-07-01 05:44:49.068 atags += ' ' * la
2025-07-01 05:44:49.074 btags += ' ' * lb
2025-07-01 05:44:49.086 else:
2025-07-01 05:44:49.096 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:44:49.105 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:44:49.118 else:
2025-07-01 05:44:49.126 # the synch pair is identical
2025-07-01 05:44:49.132 yield ' ' + aelt
2025-07-01 05:44:49.138
2025-07-01 05:44:49.146 # pump out diffs from after the synch point
2025-07-01 05:44:49.158 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:44:49.167
2025-07-01 05:44:49.174 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:44:49.181 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:44:49.186
2025-07-01 05:44:49.192 self = <difflib.Differ object at [hex]>
2025-07-01 05:44:49.204 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:44:49.213 alo = 51, ahi = 1101
2025-07-01 05:44:49.225 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:44:49.232 blo = 51, bhi = 1101
2025-07-01 05:44:49.238
2025-07-01 05:44:49.245 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:44:49.252 g = []
2025-07-01 05:44:49.259 if alo < ahi:
2025-07-01 05:44:49.268 if blo < bhi:
2025-07-01 05:44:49.279 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:44:49.285 else:
2025-07-01 05:44:49.295 g = self._dump('-', a, alo, ahi)
2025-07-01 05:44:49.302 elif blo < bhi:
2025-07-01 05:44:49.308 g = self._dump('+', b, blo, bhi)
2025-07-01 05:44:49.314
2025-07-01 05:44:49.320 > yield from g
2025-07-01 05:44:49.326
2025-07-01 05:44:49.332 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:44:49.338 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:44:49.344
2025-07-01 05:44:49.350 self = <difflib.Differ object at [hex]>
2025-07-01 05:44:49.357 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:44:49.362 alo = 51, ahi = 1101
2025-07-01 05:44:49.372 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:44:49.383 blo = 51, bhi = 1101
2025-07-01 05:44:49.390
2025-07-01 05:44:49.396 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:44:49.402 r"""
2025-07-01 05:44:49.412 When replacing one block of lines with another, search the blocks
2025-07-01 05:44:49.423 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:44:49.431 synch point, and intraline difference marking is done on the
2025-07-01 05:44:49.437 similar pair. Lots of work, but often worth it.
2025-07-01 05:44:49.447
2025-07-01 05:44:49.457 Example:
2025-07-01 05:44:49.465
2025-07-01 05:44:49.471 >>> d = Differ()
2025-07-01 05:44:49.475 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:44:49.480 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:44:49.484 >>> print(''.join(results), end="")
2025-07-01 05:44:49.489 - abcDefghiJkl
2025-07-01 05:44:49.498 + abcdefGhijkl
2025-07-01 05:44:49.509 """
2025-07-01 05:44:49.514
2025-07-01 05:44:49.522 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:44:49.533 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:44:49.543 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:44:49.553 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:44:49.565 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:44:49.574
2025-07-01 05:44:49.581 # search for the pair that matches best without being identical
2025-07-01 05:44:49.588 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:44:49.596 # on junk -- unless we have to)
2025-07-01 05:44:49.603 for j in range(blo, bhi):
2025-07-01 05:44:49.610 bj = b[j]
2025-07-01 05:44:49.620 cruncher.set_seq2(bj)
2025-07-01 05:44:49.629 for i in range(alo, ahi):
2025-07-01 05:44:49.635 ai = a[i]
2025-07-01 05:44:49.641 if ai == bj:
2025-07-01 05:44:49.646 if eqi is None:
2025-07-01 05:44:49.651 eqi, eqj = i, j
2025-07-01 05:44:49.655 continue
2025-07-01 05:44:49.660 cruncher.set_seq1(ai)
2025-07-01 05:44:49.666 # computing similarity is expensive, so use the quick
2025-07-01 05:44:49.673 # upper bounds first -- have seen this speed up messy
2025-07-01 05:44:49.683 # compares by a factor of 3.
2025-07-01 05:44:49.689 # note that ratio() is only expensive to compute the first
2025-07-01 05:44:49.695 # time it's called on a sequence pair; the expensive part
2025-07-01 05:44:49.703 # of the computation is cached by cruncher
2025-07-01 05:44:49.711 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:44:49.717 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:44:49.724 cruncher.ratio() > best_ratio:
2025-07-01 05:44:49.730 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:44:49.736 if best_ratio < cutoff:
2025-07-01 05:44:49.742 # no non-identical "pretty close" pair
2025-07-01 05:44:49.747 if eqi is None:
2025-07-01 05:44:49.753 # no identical pair either -- treat it as a straight replace
2025-07-01 05:44:49.758 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:44:49.765 return
2025-07-01 05:44:49.772 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:44:49.780 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:44:49.786 else:
2025-07-01 05:44:49.801 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:44:49.813 eqi = None
2025-07-01 05:44:49.823
2025-07-01 05:44:49.831 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:44:49.839 # identical
2025-07-01 05:44:49.853
2025-07-01 05:44:49.863 # pump out diffs from before the synch point
2025-07-01 05:44:49.870 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:44:49.876
2025-07-01 05:44:49.882 # do intraline marking on the synch pair
2025-07-01 05:44:49.887 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:44:49.892 if eqi is None:
2025-07-01 05:44:49.897 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:44:49.903 atags = btags = ""
2025-07-01 05:44:49.909 cruncher.set_seqs(aelt, belt)
2025-07-01 05:44:49.921 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:44:49.932 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:44:49.940 if tag == 'replace':
2025-07-01 05:44:49.948 atags += '^' * la
2025-07-01 05:44:49.954 btags += '^' * lb
2025-07-01 05:44:49.961 elif tag == 'delete':
2025-07-01 05:44:49.972 atags += '-' * la
2025-07-01 05:44:49.982 elif tag == 'insert':
2025-07-01 05:44:49.990 btags += '+' * lb
2025-07-01 05:44:49.997 elif tag == 'equal':
2025-07-01 05:44:50.003 atags += ' ' * la
2025-07-01 05:44:50.015 btags += ' ' * lb
2025-07-01 05:44:50.026 else:
2025-07-01 05:44:50.032 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:44:50.039 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:44:50.044 else:
2025-07-01 05:44:50.051 # the synch pair is identical
2025-07-01 05:44:50.063 yield ' ' + aelt
2025-07-01 05:44:50.073
2025-07-01 05:44:50.080 # pump out diffs from after the synch point
2025-07-01 05:44:50.090 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:44:50.099
2025-07-01 05:44:50.111 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:44:50.120 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:44:50.127
2025-07-01 05:44:50.135 self = <difflib.Differ object at [hex]>
2025-07-01 05:44:50.147 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:44:50.156 alo = 52, ahi = 1101
2025-07-01 05:44:50.168 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:44:50.178 blo = 52, bhi = 1101
2025-07-01 05:44:50.186
2025-07-01 05:44:50.197 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:44:50.206 g = []
2025-07-01 05:44:50.216 if alo < ahi:
2025-07-01 05:44:50.225 if blo < bhi:
2025-07-01 05:44:50.232 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:44:50.239 else:
2025-07-01 05:44:50.247 g = self._dump('-', a, alo, ahi)
2025-07-01 05:44:50.257 elif blo < bhi:
2025-07-01 05:44:50.265 g = self._dump('+', b, blo, bhi)
2025-07-01 05:44:50.275
2025-07-01 05:44:50.287 > yield from g
2025-07-01 05:44:50.297
2025-07-01 05:44:50.308 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:44:50.321 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:44:50.330
2025-07-01 05:44:50.339 self = <difflib.Differ object at [hex]>
2025-07-01 05:44:50.351 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:44:50.361 alo = 52, ahi = 1101
2025-07-01 05:44:50.374 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:44:50.390 blo = 52, bhi = 1101
2025-07-01 05:44:50.402
2025-07-01 05:44:50.409 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:44:50.416 r"""
2025-07-01 05:44:50.422 When replacing one block of lines with another, search the blocks
2025-07-01 05:44:50.427 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:44:50.432 synch point, and intraline difference marking is done on the
2025-07-01 05:44:50.438 similar pair. Lots of work, but often worth it.
2025-07-01 05:44:50.443
2025-07-01 05:44:50.449 Example:
2025-07-01 05:44:50.454
2025-07-01 05:44:50.462 >>> d = Differ()
2025-07-01 05:44:50.469 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:44:50.475 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:44:50.481 >>> print(''.join(results), end="")
2025-07-01 05:44:50.487 - abcDefghiJkl
2025-07-01 05:44:50.505 + abcdefGhijkl
2025-07-01 05:44:50.520 """
2025-07-01 05:44:50.526
2025-07-01 05:44:50.531 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:44:50.536 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:44:50.541 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:44:50.546 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:44:50.553 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:44:50.560
2025-07-01 05:44:50.567 # search for the pair that matches best without being identical
2025-07-01 05:44:50.574 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:44:50.581 # on junk -- unless we have to)
2025-07-01 05:44:50.586 for j in range(blo, bhi):
2025-07-01 05:44:50.592 bj = b[j]
2025-07-01 05:44:50.597 cruncher.set_seq2(bj)
2025-07-01 05:44:50.602 for i in range(alo, ahi):
2025-07-01 05:44:50.607 ai = a[i]
2025-07-01 05:44:50.612 if ai == bj:
2025-07-01 05:44:50.618 if eqi is None:
2025-07-01 05:44:50.624 eqi, eqj = i, j
2025-07-01 05:44:50.630 continue
2025-07-01 05:44:50.636 cruncher.set_seq1(ai)
2025-07-01 05:44:50.642 # computing similarity is expensive, so use the quick
2025-07-01 05:44:50.651 # upper bounds first -- have seen this speed up messy
2025-07-01 05:44:50.660 # compares by a factor of 3.
2025-07-01 05:44:50.671 # note that ratio() is only expensive to compute the first
2025-07-01 05:44:50.683 # time it's called on a sequence pair; the expensive part
2025-07-01 05:44:50.693 # of the computation is cached by cruncher
2025-07-01 05:44:50.703 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:44:50.712 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:44:50.719 cruncher.ratio() > best_ratio:
2025-07-01 05:44:50.726 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:44:50.733 if best_ratio < cutoff:
2025-07-01 05:44:50.739 # no non-identical "pretty close" pair
2025-07-01 05:44:50.745 if eqi is None:
2025-07-01 05:44:50.752 # no identical pair either -- treat it as a straight replace
2025-07-01 05:44:50.759 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:44:50.764 return
2025-07-01 05:44:50.770 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:44:50.775 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:44:50.781 else:
2025-07-01 05:44:50.787 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:44:50.794 eqi = None
2025-07-01 05:44:50.801
2025-07-01 05:44:50.807 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:44:50.812 # identical
2025-07-01 05:44:50.817
2025-07-01 05:44:50.823 # pump out diffs from before the synch point
2025-07-01 05:44:50.829 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:44:50.835
2025-07-01 05:44:50.842 # do intraline marking on the synch pair
2025-07-01 05:44:50.849 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:44:50.855 if eqi is None:
2025-07-01 05:44:50.860 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:44:50.865 atags = btags = ""
2025-07-01 05:44:50.871 cruncher.set_seqs(aelt, belt)
2025-07-01 05:44:50.877 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:44:50.884 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:44:50.890 if tag == 'replace':
2025-07-01 05:44:50.896 atags += '^' * la
2025-07-01 05:44:50.902 btags += '^' * lb
2025-07-01 05:44:50.906 elif tag == 'delete':
2025-07-01 05:44:50.911 atags += '-' * la
2025-07-01 05:44:50.916 elif tag == 'insert':
2025-07-01 05:44:50.921 btags += '+' * lb
2025-07-01 05:44:50.927 elif tag == 'equal':
2025-07-01 05:44:50.933 atags += ' ' * la
2025-07-01 05:44:50.940 btags += ' ' * lb
2025-07-01 05:44:50.946 else:
2025-07-01 05:44:50.953 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:44:50.957 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:44:50.962 else:
2025-07-01 05:44:50.967 # the synch pair is identical
2025-07-01 05:44:50.972 yield ' ' + aelt
2025-07-01 05:44:50.978
2025-07-01 05:44:50.983 # pump out diffs from after the synch point
2025-07-01 05:44:50.990 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:44:50.995
2025-07-01 05:44:51.001 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:44:51.008 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:44:51.015
2025-07-01 05:44:51.022 self = <difflib.Differ object at [hex]>
2025-07-01 05:44:51.030 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:44:51.037 alo = 53, ahi = 1101
2025-07-01 05:44:51.046 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:44:51.057 blo = 53, bhi = 1101
2025-07-01 05:44:51.068
2025-07-01 05:44:51.077 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:44:51.083 g = []
2025-07-01 05:44:51.090 if alo < ahi:
2025-07-01 05:44:51.097 if blo < bhi:
2025-07-01 05:44:51.103 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:44:51.111 else:
2025-07-01 05:44:51.122 g = self._dump('-', a, alo, ahi)
2025-07-01 05:44:51.131 elif blo < bhi:
2025-07-01 05:44:51.138 g = self._dump('+', b, blo, bhi)
2025-07-01 05:44:51.144
2025-07-01 05:44:51.151 > yield from g
2025-07-01 05:44:51.161
2025-07-01 05:44:51.168 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:44:51.181 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:44:51.190
2025-07-01 05:44:51.196 self = <difflib.Differ object at [hex]>
2025-07-01 05:44:51.202 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:44:51.208 alo = 53, ahi = 1101
2025-07-01 05:44:51.215 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:44:51.222 blo = 53, bhi = 1101
2025-07-01 05:44:51.231
2025-07-01 05:44:51.243 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:44:51.253 r"""
2025-07-01 05:44:51.262 When replacing one block of lines with another, search the blocks
2025-07-01 05:44:51.269 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:44:51.276 synch point, and intraline difference marking is done on the
2025-07-01 05:44:51.284 similar pair. Lots of work, but often worth it.
2025-07-01 05:44:51.290
2025-07-01 05:44:51.295 Example:
2025-07-01 05:44:51.300
2025-07-01 05:44:51.304 >>> d = Differ()
2025-07-01 05:44:51.309 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:44:51.315 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:44:51.320 >>> print(''.join(results), end="")
2025-07-01 05:44:51.326 - abcDefghiJkl
2025-07-01 05:44:51.338 + abcdefGhijkl
2025-07-01 05:44:51.354 """
2025-07-01 05:44:51.361
2025-07-01 05:44:51.367 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:44:51.372 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:44:51.378 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:44:51.384 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:44:51.390 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:44:51.399
2025-07-01 05:44:51.410 # search for the pair that matches best without being identical
2025-07-01 05:44:51.419 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:44:51.427 # on junk -- unless we have to)
2025-07-01 05:44:51.433 for j in range(blo, bhi):
2025-07-01 05:44:51.438 bj = b[j]
2025-07-01 05:44:51.442 cruncher.set_seq2(bj)
2025-07-01 05:44:51.447 for i in range(alo, ahi):
2025-07-01 05:44:51.453 ai = a[i]
2025-07-01 05:44:51.458 if ai == bj:
2025-07-01 05:44:51.464 if eqi is None:
2025-07-01 05:44:51.469 eqi, eqj = i, j
2025-07-01 05:44:51.475 continue
2025-07-01 05:44:51.482 cruncher.set_seq1(ai)
2025-07-01 05:44:51.490 # computing similarity is expensive, so use the quick
2025-07-01 05:44:51.499 # upper bounds first -- have seen this speed up messy
2025-07-01 05:44:51.506 # compares by a factor of 3.
2025-07-01 05:44:51.512 # note that ratio() is only expensive to compute the first
2025-07-01 05:44:51.518 # time it's called on a sequence pair; the expensive part
2025-07-01 05:44:51.526 # of the computation is cached by cruncher
2025-07-01 05:44:51.534 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:44:51.542 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:44:51.549 cruncher.ratio() > best_ratio:
2025-07-01 05:44:51.556 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:44:51.563 if best_ratio < cutoff:
2025-07-01 05:44:51.570 # no non-identical "pretty close" pair
2025-07-01 05:44:51.577 if eqi is None:
2025-07-01 05:44:51.584 # no identical pair either -- treat it as a straight replace
2025-07-01 05:44:51.591 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:44:51.598 return
2025-07-01 05:44:51.605 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:44:51.612 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:44:51.618 else:
2025-07-01 05:44:51.627 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:44:51.638 eqi = None
2025-07-01 05:44:51.646
2025-07-01 05:44:51.652 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:44:51.657 # identical
2025-07-01 05:44:51.663
2025-07-01 05:44:51.671 # pump out diffs from before the synch point
2025-07-01 05:44:51.682 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:44:51.691
2025-07-01 05:44:51.697 # do intraline marking on the synch pair
2025-07-01 05:44:51.702 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:44:51.708 if eqi is None:
2025-07-01 05:44:51.713 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:44:51.719 atags = btags = ""
2025-07-01 05:44:51.730 cruncher.set_seqs(aelt, belt)
2025-07-01 05:44:51.741 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:44:51.753 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:44:51.764 if tag == 'replace':
2025-07-01 05:44:51.772 atags += '^' * la
2025-07-01 05:44:51.778 btags += '^' * lb
2025-07-01 05:44:51.784 elif tag == 'delete':
2025-07-01 05:44:51.790 atags += '-' * la
2025-07-01 05:44:51.800 elif tag == 'insert':
2025-07-01 05:44:51.810 btags += '+' * lb
2025-07-01 05:44:51.820 elif tag == 'equal':
2025-07-01 05:44:51.831 atags += ' ' * la
2025-07-01 05:44:51.843 btags += ' ' * lb
2025-07-01 05:44:51.852 else:
2025-07-01 05:44:51.860 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:44:51.867 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:44:51.876 else:
2025-07-01 05:44:51.883 # the synch pair is identical
2025-07-01 05:44:51.890 yield ' ' + aelt
2025-07-01 05:44:51.896
2025-07-01 05:44:51.902 # pump out diffs from after the synch point
2025-07-01 05:44:51.908 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:44:51.914
2025-07-01 05:44:51.925 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:44:51.934 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:44:51.942
2025-07-01 05:44:51.951 self = <difflib.Differ object at [hex]>
2025-07-01 05:44:51.960 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:44:51.968 alo = 54, ahi = 1101
2025-07-01 05:44:51.976 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:44:51.982 blo = 54, bhi = 1101
2025-07-01 05:44:51.992
2025-07-01 05:44:52.002 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:44:52.009 g = []
2025-07-01 05:44:52.016 if alo < ahi:
2025-07-01 05:44:52.022 if blo < bhi:
2025-07-01 05:44:52.028 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:44:52.034 else:
2025-07-01 05:44:52.045 g = self._dump('-', a, alo, ahi)
2025-07-01 05:44:52.055 elif blo < bhi:
2025-07-01 05:44:52.063 g = self._dump('+', b, blo, bhi)
2025-07-01 05:44:52.070
2025-07-01 05:44:52.080 > yield from g
2025-07-01 05:44:52.090
2025-07-01 05:44:52.098 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:44:52.106 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:44:52.113
2025-07-01 05:44:52.118 self = <difflib.Differ object at [hex]>
2025-07-01 05:44:52.123 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:44:52.130 alo = 54, ahi = 1101
2025-07-01 05:44:52.135 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:44:52.139 blo = 54, bhi = 1101
2025-07-01 05:44:52.144
2025-07-01 05:44:52.149 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:44:52.154 r"""
2025-07-01 05:44:52.161 When replacing one block of lines with another, search the blocks
2025-07-01 05:44:52.166 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:44:52.173 synch point, and intraline difference marking is done on the
2025-07-01 05:44:52.180 similar pair. Lots of work, but often worth it.
2025-07-01 05:44:52.187
2025-07-01 05:44:52.194 Example:
2025-07-01 05:44:52.206
2025-07-01 05:44:52.215 >>> d = Differ()
2025-07-01 05:44:52.223 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:44:52.229 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:44:52.234 >>> print(''.join(results), end="")
2025-07-01 05:44:52.240 - abcDefghiJkl
2025-07-01 05:44:52.250 + abcdefGhijkl
2025-07-01 05:44:52.259 """
2025-07-01 05:44:52.263
2025-07-01 05:44:52.268 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:44:52.273 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:44:52.277 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:44:52.282 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:44:52.287 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:44:52.292
2025-07-01 05:44:52.296 # search for the pair that matches best without being identical
2025-07-01 05:44:52.301 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:44:52.306 # on junk -- unless we have to)
2025-07-01 05:44:52.311 for j in range(blo, bhi):
2025-07-01 05:44:52.316 bj = b[j]
2025-07-01 05:44:52.321 cruncher.set_seq2(bj)
2025-07-01 05:44:52.326 for i in range(alo, ahi):
2025-07-01 05:44:52.331 ai = a[i]
2025-07-01 05:44:52.337 if ai == bj:
2025-07-01 05:44:52.341 if eqi is None:
2025-07-01 05:44:52.346 eqi, eqj = i, j
2025-07-01 05:44:52.351 continue
2025-07-01 05:44:52.356 cruncher.set_seq1(ai)
2025-07-01 05:44:52.362 # computing similarity is expensive, so use the quick
2025-07-01 05:44:52.367 # upper bounds first -- have seen this speed up messy
2025-07-01 05:44:52.373 # compares by a factor of 3.
2025-07-01 05:44:52.382 # note that ratio() is only expensive to compute the first
2025-07-01 05:44:52.389 # time it's called on a sequence pair; the expensive part
2025-07-01 05:44:52.394 # of the computation is cached by cruncher
2025-07-01 05:44:52.401 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:44:52.408 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:44:52.415 cruncher.ratio() > best_ratio:
2025-07-01 05:44:52.422 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:44:52.429 if best_ratio < cutoff:
2025-07-01 05:44:52.436 # no non-identical "pretty close" pair
2025-07-01 05:44:52.443 if eqi is None:
2025-07-01 05:44:52.450 # no identical pair either -- treat it as a straight replace
2025-07-01 05:44:52.464 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:44:52.472 return
2025-07-01 05:44:52.484 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:44:52.494 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:44:52.505 else:
2025-07-01 05:44:52.516 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:44:52.523 eqi = None
2025-07-01 05:44:52.530
2025-07-01 05:44:52.537 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:44:52.546 # identical
2025-07-01 05:44:52.559
2025-07-01 05:44:52.569 # pump out diffs from before the synch point
2025-07-01 05:44:52.581 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:44:52.592
2025-07-01 05:44:52.605 # do intraline marking on the synch pair
2025-07-01 05:44:52.619 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:44:52.630 if eqi is None:
2025-07-01 05:44:52.642 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:44:52.650 atags = btags = ""
2025-07-01 05:44:52.657 cruncher.set_seqs(aelt, belt)
2025-07-01 05:44:52.664 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:44:52.670 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:44:52.676 if tag == 'replace':
2025-07-01 05:44:52.681 atags += '^' * la
2025-07-01 05:44:52.687 btags += '^' * lb
2025-07-01 05:44:52.695 elif tag == 'delete':
2025-07-01 05:44:52.707 atags += '-' * la
2025-07-01 05:44:52.717 elif tag == 'insert':
2025-07-01 05:44:52.727 btags += '+' * lb
2025-07-01 05:44:52.739 elif tag == 'equal':
2025-07-01 05:44:52.753 atags += ' ' * la
2025-07-01 05:44:52.762 btags += ' ' * lb
2025-07-01 05:44:52.770 else:
2025-07-01 05:44:52.780 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:44:52.790 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:44:52.800 else:
2025-07-01 05:44:52.814 # the synch pair is identical
2025-07-01 05:44:52.823 yield ' ' + aelt
2025-07-01 05:44:52.830
2025-07-01 05:44:52.835 # pump out diffs from after the synch point
2025-07-01 05:44:52.839 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:44:52.844
2025-07-01 05:44:52.848 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:44:52.855 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:44:52.865
2025-07-01 05:44:52.872 self = <difflib.Differ object at [hex]>
2025-07-01 05:44:52.880 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:44:52.887 alo = 55, ahi = 1101
2025-07-01 05:44:52.897 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:44:52.905 blo = 55, bhi = 1101
2025-07-01 05:44:52.913
2025-07-01 05:44:52.920 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:44:52.928 g = []
2025-07-01 05:44:52.934 if alo < ahi:
2025-07-01 05:44:52.942 if blo < bhi:
2025-07-01 05:44:52.949 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:44:52.955 else:
2025-07-01 05:44:52.962 g = self._dump('-', a, alo, ahi)
2025-07-01 05:44:52.966 elif blo < bhi:
2025-07-01 05:44:52.971 g = self._dump('+', b, blo, bhi)
2025-07-01 05:44:52.975
2025-07-01 05:44:52.980 > yield from g
2025-07-01 05:44:52.984
2025-07-01 05:44:52.988 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:44:52.993 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:44:53.001
2025-07-01 05:44:53.008 self = <difflib.Differ object at [hex]>
2025-07-01 05:44:53.015 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:44:53.021 alo = 55, ahi = 1101
2025-07-01 05:44:53.027 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:44:53.033 blo = 55, bhi = 1101
2025-07-01 05:44:53.039
2025-07-01 05:44:53.047 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:44:53.055 r"""
2025-07-01 05:44:53.067 When replacing one block of lines with another, search the blocks
2025-07-01 05:44:53.075 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:44:53.082 synch point, and intraline difference marking is done on the
2025-07-01 05:44:53.086 similar pair. Lots of work, but often worth it.
2025-07-01 05:44:53.092
2025-07-01 05:44:53.097 Example:
2025-07-01 05:44:53.102
2025-07-01 05:44:53.113 >>> d = Differ()
2025-07-01 05:44:53.122 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:44:53.131 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:44:53.139 >>> print(''.join(results), end="")
2025-07-01 05:44:53.150 - abcDefghiJkl
2025-07-01 05:44:53.173 + abcdefGhijkl
2025-07-01 05:44:53.194 """
2025-07-01 05:44:53.204
2025-07-01 05:44:53.213 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:44:53.226 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:44:53.236 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:44:53.242 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:44:53.249 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:44:53.255
2025-07-01 05:44:53.262 # search for the pair that matches best without being identical
2025-07-01 05:44:53.273 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:44:53.285 # on junk -- unless we have to)
2025-07-01 05:44:53.292 for j in range(blo, bhi):
2025-07-01 05:44:53.300 bj = b[j]
2025-07-01 05:44:53.306 cruncher.set_seq2(bj)
2025-07-01 05:44:53.312 for i in range(alo, ahi):
2025-07-01 05:44:53.318 ai = a[i]
2025-07-01 05:44:53.324 if ai == bj:
2025-07-01 05:44:53.330 if eqi is None:
2025-07-01 05:44:53.336 eqi, eqj = i, j
2025-07-01 05:44:53.342 continue
2025-07-01 05:44:53.347 cruncher.set_seq1(ai)
2025-07-01 05:44:53.353 # computing similarity is expensive, so use the quick
2025-07-01 05:44:53.359 # upper bounds first -- have seen this speed up messy
2025-07-01 05:44:53.365 # compares by a factor of 3.
2025-07-01 05:44:53.370 # note that ratio() is only expensive to compute the first
2025-07-01 05:44:53.380 # time it's called on a sequence pair; the expensive part
2025-07-01 05:44:53.386 # of the computation is cached by cruncher
2025-07-01 05:44:53.392 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:44:53.399 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:44:53.406 cruncher.ratio() > best_ratio:
2025-07-01 05:44:53.415 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:44:53.426 if best_ratio < cutoff:
2025-07-01 05:44:53.433 # no non-identical "pretty close" pair
2025-07-01 05:44:53.439 if eqi is None:
2025-07-01 05:44:53.447 # no identical pair either -- treat it as a straight replace
2025-07-01 05:44:53.454 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:44:53.463 return
2025-07-01 05:44:53.475 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:44:53.485 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:44:53.492 else:
2025-07-01 05:44:53.498 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:44:53.504 eqi = None
2025-07-01 05:44:53.509
2025-07-01 05:44:53.515 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:44:53.521 # identical
2025-07-01 05:44:53.527
2025-07-01 05:44:53.532 # pump out diffs from before the synch point
2025-07-01 05:44:53.538 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:44:53.543
2025-07-01 05:44:53.551 # do intraline marking on the synch pair
2025-07-01 05:44:53.561 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:44:53.570 if eqi is None:
2025-07-01 05:44:53.577 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:44:53.584 atags = btags = ""
2025-07-01 05:44:53.590 cruncher.set_seqs(aelt, belt)
2025-07-01 05:44:53.596 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:44:53.603 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:44:53.608 if tag == 'replace':
2025-07-01 05:44:53.614 atags += '^' * la
2025-07-01 05:44:53.624 btags += '^' * lb
2025-07-01 05:44:53.633 elif tag == 'delete':
2025-07-01 05:44:53.640 atags += '-' * la
2025-07-01 05:44:53.648 elif tag == 'insert':
2025-07-01 05:44:53.658 btags += '+' * lb
2025-07-01 05:44:53.665 elif tag == 'equal':
2025-07-01 05:44:53.672 atags += ' ' * la
2025-07-01 05:44:53.678 btags += ' ' * lb
2025-07-01 05:44:53.685 else:
2025-07-01 05:44:53.693 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:44:53.705 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:44:53.716 else:
2025-07-01 05:44:53.725 # the synch pair is identical
2025-07-01 05:44:53.732 yield ' ' + aelt
2025-07-01 05:44:53.738
2025-07-01 05:44:53.744 # pump out diffs from after the synch point
2025-07-01 05:44:53.750 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:44:53.755
2025-07-01 05:44:53.761 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:44:53.771 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:44:53.782
2025-07-01 05:44:53.792 self = <difflib.Differ object at [hex]>
2025-07-01 05:44:53.805 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:44:53.817 alo = 56, ahi = 1101
2025-07-01 05:44:53.827 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:44:53.836 blo = 56, bhi = 1101
2025-07-01 05:44:53.843
2025-07-01 05:44:53.850 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:44:53.856 g = []
2025-07-01 05:44:53.862 if alo < ahi:
2025-07-01 05:44:53.868 if blo < bhi:
2025-07-01 05:44:53.874 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:44:53.884 else:
2025-07-01 05:44:53.893 g = self._dump('-', a, alo, ahi)
2025-07-01 05:44:53.900 elif blo < bhi:
2025-07-01 05:44:53.906 g = self._dump('+', b, blo, bhi)
2025-07-01 05:44:53.915
2025-07-01 05:44:53.925 > yield from g
2025-07-01 05:44:53.936
2025-07-01 05:44:53.944 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:44:53.954 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:44:53.967
2025-07-01 05:44:53.976 self = <difflib.Differ object at [hex]>
2025-07-01 05:44:53.986 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:44:53.997 alo = 56, ahi = 1101
2025-07-01 05:44:54.006 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:44:54.015 blo = 56, bhi = 1101
2025-07-01 05:44:54.023
2025-07-01 05:44:54.029 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:44:54.035 r"""
2025-07-01 05:44:54.042 When replacing one block of lines with another, search the blocks
2025-07-01 05:44:54.053 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:44:54.062 synch point, and intraline difference marking is done on the
2025-07-01 05:44:54.069 similar pair. Lots of work, but often worth it.
2025-07-01 05:44:54.079
2025-07-01 05:44:54.090 Example:
2025-07-01 05:44:54.101
2025-07-01 05:44:54.109 >>> d = Differ()
2025-07-01 05:44:54.115 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:44:54.122 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:44:54.129 >>> print(''.join(results), end="")
2025-07-01 05:44:54.136 - abcDefghiJkl
2025-07-01 05:44:54.147 + abcdefGhijkl
2025-07-01 05:44:54.158 """
2025-07-01 05:44:54.164
2025-07-01 05:44:54.171 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:44:54.176 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:44:54.182 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:44:54.187 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:44:54.192 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:44:54.200
2025-07-01 05:44:54.206 # search for the pair that matches best without being identical
2025-07-01 05:44:54.214 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:44:54.221 # on junk -- unless we have to)
2025-07-01 05:44:54.227 for j in range(blo, bhi):
2025-07-01 05:44:54.234 bj = b[j]
2025-07-01 05:44:54.241 cruncher.set_seq2(bj)
2025-07-01 05:44:54.252 for i in range(alo, ahi):
2025-07-01 05:44:54.265 ai = a[i]
2025-07-01 05:44:54.275 if ai == bj:
2025-07-01 05:44:54.282 if eqi is None:
2025-07-01 05:44:54.291 eqi, eqj = i, j
2025-07-01 05:44:54.302 continue
2025-07-01 05:44:54.311 cruncher.set_seq1(ai)
2025-07-01 05:44:54.319 # computing similarity is expensive, so use the quick
2025-07-01 05:44:54.326 # upper bounds first -- have seen this speed up messy
2025-07-01 05:44:54.333 # compares by a factor of 3.
2025-07-01 05:44:54.339 # note that ratio() is only expensive to compute the first
2025-07-01 05:44:54.346 # time it's called on a sequence pair; the expensive part
2025-07-01 05:44:54.356 # of the computation is cached by cruncher
2025-07-01 05:44:54.366 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:44:54.379 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:44:54.389 cruncher.ratio() > best_ratio:
2025-07-01 05:44:54.400 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:44:54.411 if best_ratio < cutoff:
2025-07-01 05:44:54.420 # no non-identical "pretty close" pair
2025-07-01 05:44:54.428 if eqi is None:
2025-07-01 05:44:54.439 # no identical pair either -- treat it as a straight replace
2025-07-01 05:44:54.449 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:44:54.456 return
2025-07-01 05:44:54.463 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:44:54.470 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:44:54.476 else:
2025-07-01 05:44:54.482 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:44:54.487 eqi = None
2025-07-01 05:44:54.492
2025-07-01 05:44:54.498 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:44:54.503 # identical
2025-07-01 05:44:54.507
2025-07-01 05:44:54.515 # pump out diffs from before the synch point
2025-07-01 05:44:54.524 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:44:54.531
2025-07-01 05:44:54.538 # do intraline marking on the synch pair
2025-07-01 05:44:54.548 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:44:54.556 if eqi is None:
2025-07-01 05:44:54.562 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:44:54.568 atags = btags = ""
2025-07-01 05:44:54.575 cruncher.set_seqs(aelt, belt)
2025-07-01 05:44:54.583 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:44:54.595 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:44:54.604 if tag == 'replace':
2025-07-01 05:44:54.612 atags += '^' * la
2025-07-01 05:44:54.619 btags += '^' * lb
2025-07-01 05:44:54.625 elif tag == 'delete':
2025-07-01 05:44:54.630 atags += '-' * la
2025-07-01 05:44:54.635 elif tag == 'insert':
2025-07-01 05:44:54.641 btags += '+' * lb
2025-07-01 05:44:54.646 elif tag == 'equal':
2025-07-01 05:44:54.651 atags += ' ' * la
2025-07-01 05:44:54.657 btags += ' ' * lb
2025-07-01 05:44:54.662 else:
2025-07-01 05:44:54.669 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:44:54.674 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:44:54.680 else:
2025-07-01 05:44:54.685 # the synch pair is identical
2025-07-01 05:44:54.691 yield ' ' + aelt
2025-07-01 05:44:54.698
2025-07-01 05:44:54.704 # pump out diffs from after the synch point
2025-07-01 05:44:54.709 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:44:54.715
2025-07-01 05:44:54.722 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:44:54.733 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:44:54.743
2025-07-01 05:44:54.755 self = <difflib.Differ object at [hex]>
2025-07-01 05:44:54.764 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:44:54.770 alo = 57, ahi = 1101
2025-07-01 05:44:54.777 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:44:54.784 blo = 57, bhi = 1101
2025-07-01 05:44:54.789
2025-07-01 05:44:54.800 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:44:54.810 g = []
2025-07-01 05:44:54.818 if alo < ahi:
2025-07-01 05:44:54.828 if blo < bhi:
2025-07-01 05:44:54.840 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:44:54.852 else:
2025-07-01 05:44:54.861 g = self._dump('-', a, alo, ahi)
2025-07-01 05:44:54.873 elif blo < bhi:
2025-07-01 05:44:54.883 g = self._dump('+', b, blo, bhi)
2025-07-01 05:44:54.891
2025-07-01 05:44:54.898 > yield from g
2025-07-01 05:44:54.909
2025-07-01 05:44:54.918 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:44:54.927 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:44:54.937
2025-07-01 05:44:54.947 self = <difflib.Differ object at [hex]>
2025-07-01 05:44:54.958 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:44:54.968 alo = 57, ahi = 1101
2025-07-01 05:44:54.981 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:44:54.988 blo = 57, bhi = 1101
2025-07-01 05:44:54.995
2025-07-01 05:44:55.002 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:44:55.010 r"""
2025-07-01 05:44:55.016 When replacing one block of lines with another, search the blocks
2025-07-01 05:44:55.022 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:44:55.029 synch point, and intraline difference marking is done on the
2025-07-01 05:44:55.037 similar pair. Lots of work, but often worth it.
2025-07-01 05:44:55.043
2025-07-01 05:44:55.049 Example:
2025-07-01 05:44:55.055
2025-07-01 05:44:55.061 >>> d = Differ()
2025-07-01 05:44:55.067 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:44:55.072 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:44:55.078 >>> print(''.join(results), end="")
2025-07-01 05:44:55.084 - abcDefghiJkl
2025-07-01 05:44:55.101 + abcdefGhijkl
2025-07-01 05:44:55.118 """
2025-07-01 05:44:55.124
2025-07-01 05:44:55.130 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:44:55.136 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:44:55.143 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:44:55.151 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:44:55.156 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:44:55.162
2025-07-01 05:44:55.168 # search for the pair that matches best without being identical
2025-07-01 05:44:55.175 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:44:55.187 # on junk -- unless we have to)
2025-07-01 05:44:55.197 for j in range(blo, bhi):
2025-07-01 05:44:55.204 bj = b[j]
2025-07-01 05:44:55.211 cruncher.set_seq2(bj)
2025-07-01 05:44:55.217 for i in range(alo, ahi):
2025-07-01 05:44:55.229 ai = a[i]
2025-07-01 05:44:55.240 if ai == bj:
2025-07-01 05:44:55.249 if eqi is None:
2025-07-01 05:44:55.258 eqi, eqj = i, j
2025-07-01 05:44:55.269 continue
2025-07-01 05:44:55.279 cruncher.set_seq1(ai)
2025-07-01 05:44:55.287 # computing similarity is expensive, so use the quick
2025-07-01 05:44:55.295 # upper bounds first -- have seen this speed up messy
2025-07-01 05:44:55.302 # compares by a factor of 3.
2025-07-01 05:44:55.310 # note that ratio() is only expensive to compute the first
2025-07-01 05:44:55.318 # time it's called on a sequence pair; the expensive part
2025-07-01 05:44:55.325 # of the computation is cached by cruncher
2025-07-01 05:44:55.331 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:44:55.338 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:44:55.350 cruncher.ratio() > best_ratio:
2025-07-01 05:44:55.359 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:44:55.365 if best_ratio < cutoff:
2025-07-01 05:44:55.371 # no non-identical "pretty close" pair
2025-07-01 05:44:55.375 if eqi is None:
2025-07-01 05:44:55.380 # no identical pair either -- treat it as a straight replace
2025-07-01 05:44:55.385 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:44:55.390 return
2025-07-01 05:44:55.395 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:44:55.401 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:44:55.406 else:
2025-07-01 05:44:55.414 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:44:55.420 eqi = None
2025-07-01 05:44:55.425
2025-07-01 05:44:55.431 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:44:55.438 # identical
2025-07-01 05:44:55.443
2025-07-01 05:44:55.448 # pump out diffs from before the synch point
2025-07-01 05:44:55.452 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:44:55.457
2025-07-01 05:44:55.462 # do intraline marking on the synch pair
2025-07-01 05:44:55.466 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:44:55.471 if eqi is None:
2025-07-01 05:44:55.476 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:44:55.480 atags = btags = ""
2025-07-01 05:44:55.486 cruncher.set_seqs(aelt, belt)
2025-07-01 05:44:55.492 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:44:55.496 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:44:55.501 if tag == 'replace':
2025-07-01 05:44:55.506 atags += '^' * la
2025-07-01 05:44:55.511 btags += '^' * lb
2025-07-01 05:44:55.517 elif tag == 'delete':
2025-07-01 05:44:55.523 atags += '-' * la
2025-07-01 05:44:55.529 elif tag == 'insert':
2025-07-01 05:44:55.535 btags += '+' * lb
2025-07-01 05:44:55.541 elif tag == 'equal':
2025-07-01 05:44:55.547 atags += ' ' * la
2025-07-01 05:44:55.554 btags += ' ' * lb
2025-07-01 05:44:55.561 else:
2025-07-01 05:44:55.567 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:44:55.574 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:44:55.584 else:
2025-07-01 05:44:55.590 # the synch pair is identical
2025-07-01 05:44:55.595 yield ' ' + aelt
2025-07-01 05:44:55.600
2025-07-01 05:44:55.606 # pump out diffs from after the synch point
2025-07-01 05:44:55.611 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:44:55.616
2025-07-01 05:44:55.622 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:44:55.628 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:44:55.638
2025-07-01 05:44:55.645 self = <difflib.Differ object at [hex]>
2025-07-01 05:44:55.652 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:44:55.659 alo = 58, ahi = 1101
2025-07-01 05:44:55.666 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:44:55.675 blo = 58, bhi = 1101
2025-07-01 05:44:55.683
2025-07-01 05:44:55.693 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:44:55.701 g = []
2025-07-01 05:44:55.707 if alo < ahi:
2025-07-01 05:44:55.712 if blo < bhi:
2025-07-01 05:44:55.717 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:44:55.722 else:
2025-07-01 05:44:55.727 g = self._dump('-', a, alo, ahi)
2025-07-01 05:44:55.732 elif blo < bhi:
2025-07-01 05:44:55.737 g = self._dump('+', b, blo, bhi)
2025-07-01 05:44:55.741
2025-07-01 05:44:55.746 > yield from g
2025-07-01 05:44:55.750
2025-07-01 05:44:55.757 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:44:55.763 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:44:55.770
2025-07-01 05:44:55.781 self = <difflib.Differ object at [hex]>
2025-07-01 05:44:55.790 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:44:55.802 alo = 58, ahi = 1101
2025-07-01 05:44:55.814 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:44:55.825 blo = 58, bhi = 1101
2025-07-01 05:44:55.835
2025-07-01 05:44:55.842 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:44:55.854 r"""
2025-07-01 05:44:55.864 When replacing one block of lines with another, search the blocks
2025-07-01 05:44:55.871 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:44:55.879 synch point, and intraline difference marking is done on the
2025-07-01 05:44:55.887 similar pair. Lots of work, but often worth it.
2025-07-01 05:44:55.898
2025-07-01 05:44:55.906 Example:
2025-07-01 05:44:55.919
2025-07-01 05:44:55.930 >>> d = Differ()
2025-07-01 05:44:55.941 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:44:55.952 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:44:55.961 >>> print(''.join(results), end="")
2025-07-01 05:44:55.967 - abcDefghiJkl
2025-07-01 05:44:55.983 + abcdefGhijkl
2025-07-01 05:44:55.999 """
2025-07-01 05:44:56.006
2025-07-01 05:44:56.012 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:44:56.017 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:44:56.028 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:44:56.037 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:44:56.044 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:44:56.050
2025-07-01 05:44:56.055 # search for the pair that matches best without being identical
2025-07-01 05:44:56.060 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:44:56.066 # on junk -- unless we have to)
2025-07-01 05:44:56.076 for j in range(blo, bhi):
2025-07-01 05:44:56.085 bj = b[j]
2025-07-01 05:44:56.093 cruncher.set_seq2(bj)
2025-07-01 05:44:56.099 for i in range(alo, ahi):
2025-07-01 05:44:56.111 ai = a[i]
2025-07-01 05:44:56.122 if ai == bj:
2025-07-01 05:44:56.131 if eqi is None:
2025-07-01 05:44:56.144 eqi, eqj = i, j
2025-07-01 05:44:56.153 continue
2025-07-01 05:44:56.162 cruncher.set_seq1(ai)
2025-07-01 05:44:56.169 # computing similarity is expensive, so use the quick
2025-07-01 05:44:56.176 # upper bounds first -- have seen this speed up messy
2025-07-01 05:44:56.182 # compares by a factor of 3.
2025-07-01 05:44:56.187 # note that ratio() is only expensive to compute the first
2025-07-01 05:44:56.191 # time it's called on a sequence pair; the expensive part
2025-07-01 05:44:56.197 # of the computation is cached by cruncher
2025-07-01 05:44:56.203 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:44:56.208 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:44:56.215 cruncher.ratio() > best_ratio:
2025-07-01 05:44:56.224 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:44:56.232 if best_ratio < cutoff:
2025-07-01 05:44:56.240 # no non-identical "pretty close" pair
2025-07-01 05:44:56.249 if eqi is None:
2025-07-01 05:44:56.261 # no identical pair either -- treat it as a straight replace
2025-07-01 05:44:56.276 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:44:56.286 return
2025-07-01 05:44:56.296 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:44:56.305 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:44:56.313 else:
2025-07-01 05:44:56.318 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:44:56.324 eqi = None
2025-07-01 05:44:56.330
2025-07-01 05:44:56.337 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:44:56.343 # identical
2025-07-01 05:44:56.350
2025-07-01 05:44:56.359 # pump out diffs from before the synch point
2025-07-01 05:44:56.366 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:44:56.373
2025-07-01 05:44:56.379 # do intraline marking on the synch pair
2025-07-01 05:44:56.385 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:44:56.390 if eqi is None:
2025-07-01 05:44:56.395 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:44:56.403 atags = btags = ""
2025-07-01 05:44:56.413 cruncher.set_seqs(aelt, belt)
2025-07-01 05:44:56.422 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:44:56.428 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:44:56.435 if tag == 'replace':
2025-07-01 05:44:56.445 atags += '^' * la
2025-07-01 05:44:56.453 btags += '^' * lb
2025-07-01 05:44:56.459 elif tag == 'delete':
2025-07-01 05:44:56.465 atags += '-' * la
2025-07-01 05:44:56.470 elif tag == 'insert':
2025-07-01 05:44:56.476 btags += '+' * lb
2025-07-01 05:44:56.482 elif tag == 'equal':
2025-07-01 05:44:56.487 atags += ' ' * la
2025-07-01 05:44:56.495 btags += ' ' * lb
2025-07-01 05:44:56.505 else:
2025-07-01 05:44:56.514 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:44:56.520 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:44:56.527 else:
2025-07-01 05:44:56.534 # the synch pair is identical
2025-07-01 05:44:56.541 yield ' ' + aelt
2025-07-01 05:44:56.548
2025-07-01 05:44:56.556 # pump out diffs from after the synch point
2025-07-01 05:44:56.566 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:44:56.573
2025-07-01 05:44:56.579 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:44:56.587 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:44:56.596
2025-07-01 05:44:56.602 self = <difflib.Differ object at [hex]>
2025-07-01 05:44:56.615 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:44:56.623 alo = 59, ahi = 1101
2025-07-01 05:44:56.630 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:44:56.636 blo = 59, bhi = 1101
2025-07-01 05:44:56.645
2025-07-01 05:44:56.652 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:44:56.658 g = []
2025-07-01 05:44:56.667 if alo < ahi:
2025-07-01 05:44:56.675 if blo < bhi:
2025-07-01 05:44:56.686 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:44:56.695 else:
2025-07-01 05:44:56.703 g = self._dump('-', a, alo, ahi)
2025-07-01 05:44:56.711 elif blo < bhi:
2025-07-01 05:44:56.718 g = self._dump('+', b, blo, bhi)
2025-07-01 05:44:56.723
2025-07-01 05:44:56.729 > yield from g
2025-07-01 05:44:56.735
2025-07-01 05:44:56.741 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:44:56.751 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:44:56.759
2025-07-01 05:44:56.767 self = <difflib.Differ object at [hex]>
2025-07-01 05:44:56.774 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:44:56.782 alo = 59, ahi = 1101
2025-07-01 05:44:56.794 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:44:56.803 blo = 59, bhi = 1101
2025-07-01 05:44:56.810
2025-07-01 05:44:56.820 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:44:56.830 r"""
2025-07-01 05:44:56.840 When replacing one block of lines with another, search the blocks
2025-07-01 05:44:56.850 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:44:56.858 synch point, and intraline difference marking is done on the
2025-07-01 05:44:56.866 similar pair. Lots of work, but often worth it.
2025-07-01 05:44:56.874
2025-07-01 05:44:56.880 Example:
2025-07-01 05:44:56.886
2025-07-01 05:44:56.897 >>> d = Differ()
2025-07-01 05:44:56.906 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:44:56.914 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:44:56.920 >>> print(''.join(results), end="")
2025-07-01 05:44:56.928 - abcDefghiJkl
2025-07-01 05:44:56.951 + abcdefGhijkl
2025-07-01 05:44:56.963 """
2025-07-01 05:44:56.969
2025-07-01 05:44:56.975 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:44:56.981 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:44:56.987 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:44:56.992 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:44:56.998 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:44:57.003
2025-07-01 05:44:57.009 # search for the pair that matches best without being identical
2025-07-01 05:44:57.015 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:44:57.022 # on junk -- unless we have to)
2025-07-01 05:44:57.029 for j in range(blo, bhi):
2025-07-01 05:44:57.036 bj = b[j]
2025-07-01 05:44:57.044 cruncher.set_seq2(bj)
2025-07-01 05:44:57.051 for i in range(alo, ahi):
2025-07-01 05:44:57.058 ai = a[i]
2025-07-01 05:44:57.065 if ai == bj:
2025-07-01 05:44:57.072 if eqi is None:
2025-07-01 05:44:57.079 eqi, eqj = i, j
2025-07-01 05:44:57.086 continue
2025-07-01 05:44:57.093 cruncher.set_seq1(ai)
2025-07-01 05:44:57.100 # computing similarity is expensive, so use the quick
2025-07-01 05:44:57.106 # upper bounds first -- have seen this speed up messy
2025-07-01 05:44:57.114 # compares by a factor of 3.
2025-07-01 05:44:57.122 # note that ratio() is only expensive to compute the first
2025-07-01 05:44:57.128 # time it's called on a sequence pair; the expensive part
2025-07-01 05:44:57.133 # of the computation is cached by cruncher
2025-07-01 05:44:57.144 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:44:57.157 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:44:57.166 cruncher.ratio() > best_ratio:
2025-07-01 05:44:57.177 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:44:57.186 if best_ratio < cutoff:
2025-07-01 05:44:57.193 # no non-identical "pretty close" pair
2025-07-01 05:44:57.200 if eqi is None:
2025-07-01 05:44:57.206 # no identical pair either -- treat it as a straight replace
2025-07-01 05:44:57.211 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:44:57.216 return
2025-07-01 05:44:57.221 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:44:57.226 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:44:57.230 else:
2025-07-01 05:44:57.235 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:44:57.241 eqi = None
2025-07-01 05:44:57.246
2025-07-01 05:44:57.251 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:44:57.256 # identical
2025-07-01 05:44:57.261
2025-07-01 05:44:57.269 # pump out diffs from before the synch point
2025-07-01 05:44:57.275 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:44:57.280
2025-07-01 05:44:57.285 # do intraline marking on the synch pair
2025-07-01 05:44:57.290 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:44:57.295 if eqi is None:
2025-07-01 05:44:57.307 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:44:57.315 atags = btags = ""
2025-07-01 05:44:57.328 cruncher.set_seqs(aelt, belt)
2025-07-01 05:44:57.340 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:44:57.351 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:44:57.358 if tag == 'replace':
2025-07-01 05:44:57.369 atags += '^' * la
2025-07-01 05:44:57.380 btags += '^' * lb
2025-07-01 05:44:57.393 elif tag == 'delete':
2025-07-01 05:44:57.402 atags += '-' * la
2025-07-01 05:44:57.411 elif tag == 'insert':
2025-07-01 05:44:57.418 btags += '+' * lb
2025-07-01 05:44:57.425 elif tag == 'equal':
2025-07-01 05:44:57.431 atags += ' ' * la
2025-07-01 05:44:57.438 btags += ' ' * lb
2025-07-01 05:44:57.448 else:
2025-07-01 05:44:57.456 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:44:57.463 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:44:57.469 else:
2025-07-01 05:44:57.476 # the synch pair is identical
2025-07-01 05:44:57.482 yield ' ' + aelt
2025-07-01 05:44:57.493
2025-07-01 05:44:57.503 # pump out diffs from after the synch point
2025-07-01 05:44:57.513 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:44:57.520
2025-07-01 05:44:57.526 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:44:57.530 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:44:57.535
2025-07-01 05:44:57.540 self = <difflib.Differ object at [hex]>
2025-07-01 05:44:57.547 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:44:57.553 alo = 60, ahi = 1101
2025-07-01 05:44:57.566 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:44:57.575 blo = 60, bhi = 1101
2025-07-01 05:44:57.583
2025-07-01 05:44:57.589 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:44:57.593 g = []
2025-07-01 05:44:57.597 if alo < ahi:
2025-07-01 05:44:57.602 if blo < bhi:
2025-07-01 05:44:57.606 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:44:57.611 else:
2025-07-01 05:44:57.615 g = self._dump('-', a, alo, ahi)
2025-07-01 05:44:57.619 elif blo < bhi:
2025-07-01 05:44:57.624 g = self._dump('+', b, blo, bhi)
2025-07-01 05:44:57.628
2025-07-01 05:44:57.634 > yield from g
2025-07-01 05:44:57.639
2025-07-01 05:44:57.644 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:44:57.650 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:44:57.655
2025-07-01 05:44:57.660 self = <difflib.Differ object at [hex]>
2025-07-01 05:44:57.665 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:44:57.669 alo = 60, ahi = 1101
2025-07-01 05:44:57.675 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:44:57.682 blo = 60, bhi = 1101
2025-07-01 05:44:57.693
2025-07-01 05:44:57.702 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:44:57.708 r"""
2025-07-01 05:44:57.715 When replacing one block of lines with another, search the blocks
2025-07-01 05:44:57.727 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:44:57.736 synch point, and intraline difference marking is done on the
2025-07-01 05:44:57.743 similar pair. Lots of work, but often worth it.
2025-07-01 05:44:57.749
2025-07-01 05:44:57.756 Example:
2025-07-01 05:44:57.762
2025-07-01 05:44:57.773 >>> d = Differ()
2025-07-01 05:44:57.782 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:44:57.790 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:44:57.797 >>> print(''.join(results), end="")
2025-07-01 05:44:57.806 - abcDefghiJkl
2025-07-01 05:44:57.829 + abcdefGhijkl
2025-07-01 05:44:57.843 """
2025-07-01 05:44:57.849
2025-07-01 05:44:57.856 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:44:57.863 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:44:57.871 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:44:57.881 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:44:57.889 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:44:57.895
2025-07-01 05:44:57.902 # search for the pair that matches best without being identical
2025-07-01 05:44:57.912 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:44:57.921 # on junk -- unless we have to)
2025-07-01 05:44:57.929 for j in range(blo, bhi):
2025-07-01 05:44:57.935 bj = b[j]
2025-07-01 05:44:57.942 cruncher.set_seq2(bj)
2025-07-01 05:44:57.947 for i in range(alo, ahi):
2025-07-01 05:44:57.955 ai = a[i]
2025-07-01 05:44:57.964 if ai == bj:
2025-07-01 05:44:57.973 if eqi is None:
2025-07-01 05:44:57.981 eqi, eqj = i, j
2025-07-01 05:44:57.987 continue
2025-07-01 05:44:57.993 cruncher.set_seq1(ai)
2025-07-01 05:44:57.999 # computing similarity is expensive, so use the quick
2025-07-01 05:44:58.011 # upper bounds first -- have seen this speed up messy
2025-07-01 05:44:58.020 # compares by a factor of 3.
2025-07-01 05:44:58.027 # note that ratio() is only expensive to compute the first
2025-07-01 05:44:58.035 # time it's called on a sequence pair; the expensive part
2025-07-01 05:44:58.043 # of the computation is cached by cruncher
2025-07-01 05:44:58.051 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:44:58.060 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:44:58.068 cruncher.ratio() > best_ratio:
2025-07-01 05:44:58.075 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:44:58.082 if best_ratio < cutoff:
2025-07-01 05:44:58.089 # no non-identical "pretty close" pair
2025-07-01 05:44:58.100 if eqi is None:
2025-07-01 05:44:58.110 # no identical pair either -- treat it as a straight replace
2025-07-01 05:44:58.124 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:44:58.136 return
2025-07-01 05:44:58.149 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:44:58.162 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:44:58.172 else:
2025-07-01 05:44:58.183 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:44:58.195 eqi = None
2025-07-01 05:44:58.208
2025-07-01 05:44:58.222 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:44:58.233 # identical
2025-07-01 05:44:58.244
2025-07-01 05:44:58.254 # pump out diffs from before the synch point
2025-07-01 05:44:58.262 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:44:58.273
2025-07-01 05:44:58.283 # do intraline marking on the synch pair
2025-07-01 05:44:58.292 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:44:58.299 if eqi is None:
2025-07-01 05:44:58.311 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:44:58.320 atags = btags = ""
2025-07-01 05:44:58.328 cruncher.set_seqs(aelt, belt)
2025-07-01 05:44:58.336 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:44:58.343 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:44:58.349 if tag == 'replace':
2025-07-01 05:44:58.355 atags += '^' * la
2025-07-01 05:44:58.361 btags += '^' * lb
2025-07-01 05:44:58.367 elif tag == 'delete':
2025-07-01 05:44:58.376 atags += '-' * la
2025-07-01 05:44:58.387 elif tag == 'insert':
2025-07-01 05:44:58.395 btags += '+' * lb
2025-07-01 05:44:58.404 elif tag == 'equal':
2025-07-01 05:44:58.408 atags += ' ' * la
2025-07-01 05:44:58.413 btags += ' ' * lb
2025-07-01 05:44:58.419 else:
2025-07-01 05:44:58.425 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:44:58.432 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:44:58.438 else:
2025-07-01 05:44:58.445 # the synch pair is identical
2025-07-01 05:44:58.451 yield ' ' + aelt
2025-07-01 05:44:58.458
2025-07-01 05:44:58.470 # pump out diffs from after the synch point
2025-07-01 05:44:58.479 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:44:58.486
2025-07-01 05:44:58.492 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:44:58.498 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:44:58.503
2025-07-01 05:44:58.507 self = <difflib.Differ object at [hex]>
2025-07-01 05:44:58.514 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:44:58.519 alo = 61, ahi = 1101
2025-07-01 05:44:58.532 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:44:58.539 blo = 61, bhi = 1101
2025-07-01 05:44:58.547
2025-07-01 05:44:58.554 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:44:58.565 g = []
2025-07-01 05:44:58.576 if alo < ahi:
2025-07-01 05:44:58.584 if blo < bhi:
2025-07-01 05:44:58.597 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:44:58.606 else:
2025-07-01 05:44:58.613 g = self._dump('-', a, alo, ahi)
2025-07-01 05:44:58.619 elif blo < bhi:
2025-07-01 05:44:58.627 g = self._dump('+', b, blo, bhi)
2025-07-01 05:44:58.637
2025-07-01 05:44:58.646 > yield from g
2025-07-01 05:44:58.654
2025-07-01 05:44:58.662 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:44:58.671 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:44:58.680
2025-07-01 05:44:58.688 self = <difflib.Differ object at [hex]>
2025-07-01 05:44:58.696 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:44:58.702 alo = 61, ahi = 1101
2025-07-01 05:44:58.709 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:44:58.715 blo = 61, bhi = 1101
2025-07-01 05:44:58.720
2025-07-01 05:44:58.726 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:44:58.731 r"""
2025-07-01 05:44:58.739 When replacing one block of lines with another, search the blocks
2025-07-01 05:44:58.750 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:44:58.758 synch point, and intraline difference marking is done on the
2025-07-01 05:44:58.764 similar pair. Lots of work, but often worth it.
2025-07-01 05:44:58.770
2025-07-01 05:44:58.779 Example:
2025-07-01 05:44:58.788
2025-07-01 05:44:58.795 >>> d = Differ()
2025-07-01 05:44:58.802 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:44:58.809 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:44:58.815 >>> print(''.join(results), end="")
2025-07-01 05:44:58.820 - abcDefghiJkl
2025-07-01 05:44:58.832 + abcdefGhijkl
2025-07-01 05:44:58.844 """
2025-07-01 05:44:58.849
2025-07-01 05:44:58.854 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:44:58.859 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:44:58.863 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:44:58.868 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:44:58.872 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:44:58.877
2025-07-01 05:44:58.881 # search for the pair that matches best without being identical
2025-07-01 05:44:58.889 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:44:58.900 # on junk -- unless we have to)
2025-07-01 05:44:58.908 for j in range(blo, bhi):
2025-07-01 05:44:58.914 bj = b[j]
2025-07-01 05:44:58.926 cruncher.set_seq2(bj)
2025-07-01 05:44:58.935 for i in range(alo, ahi):
2025-07-01 05:44:58.941 ai = a[i]
2025-07-01 05:44:58.946 if ai == bj:
2025-07-01 05:44:58.952 if eqi is None:
2025-07-01 05:44:58.964 eqi, eqj = i, j
2025-07-01 05:44:58.974 continue
2025-07-01 05:44:58.980 cruncher.set_seq1(ai)
2025-07-01 05:44:58.986 # computing similarity is expensive, so use the quick
2025-07-01 05:44:58.993 # upper bounds first -- have seen this speed up messy
2025-07-01 05:44:59.002 # compares by a factor of 3.
2025-07-01 05:44:59.014 # note that ratio() is only expensive to compute the first
2025-07-01 05:44:59.024 # time it's called on a sequence pair; the expensive part
2025-07-01 05:44:59.034 # of the computation is cached by cruncher
2025-07-01 05:44:59.047 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:44:59.057 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:44:59.065 cruncher.ratio() > best_ratio:
2025-07-01 05:44:59.073 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:44:59.079 if best_ratio < cutoff:
2025-07-01 05:44:59.086 # no non-identical "pretty close" pair
2025-07-01 05:44:59.093 if eqi is None:
2025-07-01 05:44:59.099 # no identical pair either -- treat it as a straight replace
2025-07-01 05:44:59.106 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:44:59.115 return
2025-07-01 05:44:59.125 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:44:59.132 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:44:59.138 else:
2025-07-01 05:44:59.146 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:44:59.155 eqi = None
2025-07-01 05:44:59.162
2025-07-01 05:44:59.171 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:44:59.184 # identical
2025-07-01 05:44:59.194
2025-07-01 05:44:59.200 # pump out diffs from before the synch point
2025-07-01 05:44:59.206 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:44:59.212
2025-07-01 05:44:59.219 # do intraline marking on the synch pair
2025-07-01 05:44:59.226 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:44:59.232 if eqi is None:
2025-07-01 05:44:59.238 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:44:59.244 atags = btags = ""
2025-07-01 05:44:59.250 cruncher.set_seqs(aelt, belt)
2025-07-01 05:44:59.256 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:44:59.263 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:44:59.270 if tag == 'replace':
2025-07-01 05:44:59.277 atags += '^' * la
2025-07-01 05:44:59.284 btags += '^' * lb
2025-07-01 05:44:59.291 elif tag == 'delete':
2025-07-01 05:44:59.299 atags += '-' * la
2025-07-01 05:44:59.307 elif tag == 'insert':
2025-07-01 05:44:59.314 btags += '+' * lb
2025-07-01 05:44:59.320 elif tag == 'equal':
2025-07-01 05:44:59.327 atags += ' ' * la
2025-07-01 05:44:59.334 btags += ' ' * lb
2025-07-01 05:44:59.346 else:
2025-07-01 05:44:59.353 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:44:59.358 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:44:59.363 else:
2025-07-01 05:44:59.368 # the synch pair is identical
2025-07-01 05:44:59.373 yield ' ' + aelt
2025-07-01 05:44:59.377
2025-07-01 05:44:59.383 # pump out diffs from after the synch point
2025-07-01 05:44:59.389 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:44:59.395
2025-07-01 05:44:59.400 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:44:59.405 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:44:59.410
2025-07-01 05:44:59.414 self = <difflib.Differ object at [hex]>
2025-07-01 05:44:59.419 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:44:59.424 alo = 62, ahi = 1101
2025-07-01 05:44:59.429 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:44:59.435 blo = 62, bhi = 1101
2025-07-01 05:44:59.440
2025-07-01 05:44:59.446 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:44:59.452 g = []
2025-07-01 05:44:59.458 if alo < ahi:
2025-07-01 05:44:59.466 if blo < bhi:
2025-07-01 05:44:59.474 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:44:59.481 else:
2025-07-01 05:44:59.488 g = self._dump('-', a, alo, ahi)
2025-07-01 05:44:59.495 elif blo < bhi:
2025-07-01 05:44:59.502 g = self._dump('+', b, blo, bhi)
2025-07-01 05:44:59.508
2025-07-01 05:44:59.515 > yield from g
2025-07-01 05:44:59.523
2025-07-01 05:44:59.535 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:44:59.546 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:44:59.553
2025-07-01 05:44:59.560 self = <difflib.Differ object at [hex]>
2025-07-01 05:44:59.568 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:44:59.574 alo = 62, ahi = 1101
2025-07-01 05:44:59.582 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:44:59.593 blo = 62, bhi = 1101
2025-07-01 05:44:59.603
2025-07-01 05:44:59.610 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:44:59.616 r"""
2025-07-01 05:44:59.622 When replacing one block of lines with another, search the blocks
2025-07-01 05:44:59.627 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:44:59.632 synch point, and intraline difference marking is done on the
2025-07-01 05:44:59.636 similar pair. Lots of work, but often worth it.
2025-07-01 05:44:59.641
2025-07-01 05:44:59.648 Example:
2025-07-01 05:44:59.653
2025-07-01 05:44:59.659 >>> d = Differ()
2025-07-01 05:44:59.665 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:44:59.671 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:44:59.678 >>> print(''.join(results), end="")
2025-07-01 05:44:59.685 - abcDefghiJkl
2025-07-01 05:44:59.699 + abcdefGhijkl
2025-07-01 05:44:59.719 """
2025-07-01 05:44:59.725
2025-07-01 05:44:59.731 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:44:59.737 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:44:59.744 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:44:59.751 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:44:59.758 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:44:59.764
2025-07-01 05:44:59.771 # search for the pair that matches best without being identical
2025-07-01 05:44:59.779 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:44:59.791 # on junk -- unless we have to)
2025-07-01 05:44:59.799 for j in range(blo, bhi):
2025-07-01 05:44:59.806 bj = b[j]
2025-07-01 05:44:59.811 cruncher.set_seq2(bj)
2025-07-01 05:44:59.817 for i in range(alo, ahi):
2025-07-01 05:44:59.823 ai = a[i]
2025-07-01 05:44:59.830 if ai == bj:
2025-07-01 05:44:59.841 if eqi is None:
2025-07-01 05:44:59.849 eqi, eqj = i, j
2025-07-01 05:44:59.859 continue
2025-07-01 05:44:59.865 cruncher.set_seq1(ai)
2025-07-01 05:44:59.871 # computing similarity is expensive, so use the quick
2025-07-01 05:44:59.876 # upper bounds first -- have seen this speed up messy
2025-07-01 05:44:59.886 # compares by a factor of 3.
2025-07-01 05:44:59.897 # note that ratio() is only expensive to compute the first
2025-07-01 05:44:59.908 # time it's called on a sequence pair; the expensive part
2025-07-01 05:44:59.919 # of the computation is cached by cruncher
2025-07-01 05:44:59.928 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:44:59.936 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:44:59.945 cruncher.ratio() > best_ratio:
2025-07-01 05:44:59.951 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:44:59.959 if best_ratio < cutoff:
2025-07-01 05:44:59.970 # no non-identical "pretty close" pair
2025-07-01 05:44:59.978 if eqi is None:
2025-07-01 05:44:59.987 # no identical pair either -- treat it as a straight replace
2025-07-01 05:44:59.995 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:45:00.005 return
2025-07-01 05:45:00.017 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:45:00.026 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:45:00.036 else:
2025-07-01 05:45:00.045 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:45:00.052 eqi = None
2025-07-01 05:45:00.058
2025-07-01 05:45:00.065 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:45:00.079 # identical
2025-07-01 05:45:00.089
2025-07-01 05:45:00.097 # pump out diffs from before the synch point
2025-07-01 05:45:00.104 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:45:00.110
2025-07-01 05:45:00.118 # do intraline marking on the synch pair
2025-07-01 05:45:00.126 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:45:00.134 if eqi is None:
2025-07-01 05:45:00.144 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:45:00.152 atags = btags = ""
2025-07-01 05:45:00.158 cruncher.set_seqs(aelt, belt)
2025-07-01 05:45:00.166 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:45:00.177 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:45:00.184 if tag == 'replace':
2025-07-01 05:45:00.191 atags += '^' * la
2025-07-01 05:45:00.199 btags += '^' * lb
2025-07-01 05:45:00.209 elif tag == 'delete':
2025-07-01 05:45:00.217 atags += '-' * la
2025-07-01 05:45:00.222 elif tag == 'insert':
2025-07-01 05:45:00.228 btags += '+' * lb
2025-07-01 05:45:00.234 elif tag == 'equal':
2025-07-01 05:45:00.240 atags += ' ' * la
2025-07-01 05:45:00.246 btags += ' ' * lb
2025-07-01 05:45:00.256 else:
2025-07-01 05:45:00.267 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:45:00.276 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:45:00.283 else:
2025-07-01 05:45:00.289 # the synch pair is identical
2025-07-01 05:45:00.295 yield ' ' + aelt
2025-07-01 05:45:00.302
2025-07-01 05:45:00.309 # pump out diffs from after the synch point
2025-07-01 05:45:00.314 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:45:00.319
2025-07-01 05:45:00.329 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:45:00.337 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:45:00.345
2025-07-01 05:45:00.352 self = <difflib.Differ object at [hex]>
2025-07-01 05:45:00.361 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:45:00.371 alo = 63, ahi = 1101
2025-07-01 05:45:00.382 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:45:00.391 blo = 63, bhi = 1101
2025-07-01 05:45:00.398
2025-07-01 05:45:00.405 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:45:00.411 g = []
2025-07-01 05:45:00.419 if alo < ahi:
2025-07-01 05:45:00.426 if blo < bhi:
2025-07-01 05:45:00.432 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:45:00.438 else:
2025-07-01 05:45:00.444 g = self._dump('-', a, alo, ahi)
2025-07-01 05:45:00.449 elif blo < bhi:
2025-07-01 05:45:00.455 g = self._dump('+', b, blo, bhi)
2025-07-01 05:45:00.461
2025-07-01 05:45:00.467 > yield from g
2025-07-01 05:45:00.473
2025-07-01 05:45:00.480 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:45:00.487 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:45:00.494
2025-07-01 05:45:00.507 self = <difflib.Differ object at [hex]>
2025-07-01 05:45:00.517 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:45:00.527 alo = 63, ahi = 1101
2025-07-01 05:45:00.535 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:45:00.541 blo = 63, bhi = 1101
2025-07-01 05:45:00.547
2025-07-01 05:45:00.551 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:45:00.556 r"""
2025-07-01 05:45:00.561 When replacing one block of lines with another, search the blocks
2025-07-01 05:45:00.567 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:45:00.572 synch point, and intraline difference marking is done on the
2025-07-01 05:45:00.577 similar pair. Lots of work, but often worth it.
2025-07-01 05:45:00.583
2025-07-01 05:45:00.589 Example:
2025-07-01 05:45:00.594
2025-07-01 05:45:00.600 >>> d = Differ()
2025-07-01 05:45:00.612 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:45:00.622 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:45:00.629 >>> print(''.join(results), end="")
2025-07-01 05:45:00.635 - abcDefghiJkl
2025-07-01 05:45:00.646 + abcdefGhijkl
2025-07-01 05:45:00.656 """
2025-07-01 05:45:00.662
2025-07-01 05:45:00.667 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:45:00.674 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:45:00.680 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:45:00.687 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:45:00.692 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:45:00.698
2025-07-01 05:45:00.704 # search for the pair that matches best without being identical
2025-07-01 05:45:00.710 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:45:00.715 # on junk -- unless we have to)
2025-07-01 05:45:00.724 for j in range(blo, bhi):
2025-07-01 05:45:00.734 bj = b[j]
2025-07-01 05:45:00.747 cruncher.set_seq2(bj)
2025-07-01 05:45:00.758 for i in range(alo, ahi):
2025-07-01 05:45:00.771 ai = a[i]
2025-07-01 05:45:00.779 if ai == bj:
2025-07-01 05:45:00.785 if eqi is None:
2025-07-01 05:45:00.790 eqi, eqj = i, j
2025-07-01 05:45:00.795 continue
2025-07-01 05:45:00.799 cruncher.set_seq1(ai)
2025-07-01 05:45:00.804 # computing similarity is expensive, so use the quick
2025-07-01 05:45:00.808 # upper bounds first -- have seen this speed up messy
2025-07-01 05:45:00.813 # compares by a factor of 3.
2025-07-01 05:45:00.817 # note that ratio() is only expensive to compute the first
2025-07-01 05:45:00.822 # time it's called on a sequence pair; the expensive part
2025-07-01 05:45:00.826 # of the computation is cached by cruncher
2025-07-01 05:45:00.831 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:45:00.836 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:45:00.840 cruncher.ratio() > best_ratio:
2025-07-01 05:45:00.845 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:45:00.851 if best_ratio < cutoff:
2025-07-01 05:45:00.856 # no non-identical "pretty close" pair
2025-07-01 05:45:00.861 if eqi is None:
2025-07-01 05:45:00.866 # no identical pair either -- treat it as a straight replace
2025-07-01 05:45:00.870 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:45:00.874 return
2025-07-01 05:45:00.879 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:45:00.883 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:45:00.888 else:
2025-07-01 05:45:00.892 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:45:00.896 eqi = None
2025-07-01 05:45:00.901
2025-07-01 05:45:00.905 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:45:00.909 # identical
2025-07-01 05:45:00.913
2025-07-01 05:45:00.917 # pump out diffs from before the synch point
2025-07-01 05:45:00.922 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:45:00.926
2025-07-01 05:45:00.930 # do intraline marking on the synch pair
2025-07-01 05:45:00.935 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:45:00.939 if eqi is None:
2025-07-01 05:45:00.944 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:45:00.948 atags = btags = ""
2025-07-01 05:45:00.953 cruncher.set_seqs(aelt, belt)
2025-07-01 05:45:00.957 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:45:00.961 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:45:00.966 if tag == 'replace':
2025-07-01 05:45:00.970 atags += '^' * la
2025-07-01 05:45:00.975 btags += '^' * lb
2025-07-01 05:45:00.979 elif tag == 'delete':
2025-07-01 05:45:00.984 atags += '-' * la
2025-07-01 05:45:00.988 elif tag == 'insert':
2025-07-01 05:45:00.993 btags += '+' * lb
2025-07-01 05:45:00.997 elif tag == 'equal':
2025-07-01 05:45:01.001 atags += ' ' * la
2025-07-01 05:45:01.006 btags += ' ' * lb
2025-07-01 05:45:01.010 else:
2025-07-01 05:45:01.014 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:45:01.019 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:45:01.023 else:
2025-07-01 05:45:01.028 # the synch pair is identical
2025-07-01 05:45:01.032 yield ' ' + aelt
2025-07-01 05:45:01.036
2025-07-01 05:45:01.041 # pump out diffs from after the synch point
2025-07-01 05:45:01.046 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:45:01.050
2025-07-01 05:45:01.054 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:45:01.059 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:45:01.064
2025-07-01 05:45:01.068 self = <difflib.Differ object at [hex]>
2025-07-01 05:45:01.073 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:45:01.077 alo = 64, ahi = 1101
2025-07-01 05:45:01.082 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:45:01.087 blo = 64, bhi = 1101
2025-07-01 05:45:01.092
2025-07-01 05:45:01.096 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:45:01.100 g = []
2025-07-01 05:45:01.105 if alo < ahi:
2025-07-01 05:45:01.109 if blo < bhi:
2025-07-01 05:45:01.114 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:45:01.118 else:
2025-07-01 05:45:01.122 g = self._dump('-', a, alo, ahi)
2025-07-01 05:45:01.127 elif blo < bhi:
2025-07-01 05:45:01.132 g = self._dump('+', b, blo, bhi)
2025-07-01 05:45:01.136
2025-07-01 05:45:01.141 > yield from g
2025-07-01 05:45:01.146
2025-07-01 05:45:01.150 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:45:01.155 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:45:01.160
2025-07-01 05:45:01.164 self = <difflib.Differ object at [hex]>
2025-07-01 05:45:01.169 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:45:01.181 alo = 64, ahi = 1101
2025-07-01 05:45:01.190 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:45:01.198 blo = 64, bhi = 1101
2025-07-01 05:45:01.207
2025-07-01 05:45:01.218 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:45:01.228 r"""
2025-07-01 05:45:01.239 When replacing one block of lines with another, search the blocks
2025-07-01 05:45:01.249 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:45:01.260 synch point, and intraline difference marking is done on the
2025-07-01 05:45:01.270 similar pair. Lots of work, but often worth it.
2025-07-01 05:45:01.277
2025-07-01 05:45:01.282 Example:
2025-07-01 05:45:01.288
2025-07-01 05:45:01.294 >>> d = Differ()
2025-07-01 05:45:01.303 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:45:01.311 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:45:01.319 >>> print(''.join(results), end="")
2025-07-01 05:45:01.324 - abcDefghiJkl
2025-07-01 05:45:01.339 + abcdefGhijkl
2025-07-01 05:45:01.364 """
2025-07-01 05:45:01.375
2025-07-01 05:45:01.385 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:45:01.396 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:45:01.403 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:45:01.409 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:45:01.414 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:45:01.419
2025-07-01 05:45:01.426 # search for the pair that matches best without being identical
2025-07-01 05:45:01.433 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:45:01.439 # on junk -- unless we have to)
2025-07-01 05:45:01.448 for j in range(blo, bhi):
2025-07-01 05:45:01.456 bj = b[j]
2025-07-01 05:45:01.464 cruncher.set_seq2(bj)
2025-07-01 05:45:01.471 for i in range(alo, ahi):
2025-07-01 05:45:01.479 ai = a[i]
2025-07-01 05:45:01.492 if ai == bj:
2025-07-01 05:45:01.501 if eqi is None:
2025-07-01 05:45:01.508 eqi, eqj = i, j
2025-07-01 05:45:01.514 continue
2025-07-01 05:45:01.522 cruncher.set_seq1(ai)
2025-07-01 05:45:01.529 # computing similarity is expensive, so use the quick
2025-07-01 05:45:01.535 # upper bounds first -- have seen this speed up messy
2025-07-01 05:45:01.543 # compares by a factor of 3.
2025-07-01 05:45:01.550 # note that ratio() is only expensive to compute the first
2025-07-01 05:45:01.557 # time it's called on a sequence pair; the expensive part
2025-07-01 05:45:01.568 # of the computation is cached by cruncher
2025-07-01 05:45:01.576 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:45:01.582 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:45:01.588 cruncher.ratio() > best_ratio:
2025-07-01 05:45:01.595 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:45:01.604 if best_ratio < cutoff:
2025-07-01 05:45:01.613 # no non-identical "pretty close" pair
2025-07-01 05:45:01.620 if eqi is None:
2025-07-01 05:45:01.627 # no identical pair either -- treat it as a straight replace
2025-07-01 05:45:01.635 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:45:01.646 return
2025-07-01 05:45:01.660 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:45:01.670 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:45:01.677 else:
2025-07-01 05:45:01.686 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:45:01.695 eqi = None
2025-07-01 05:45:01.706
2025-07-01 05:45:01.713 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:45:01.719 # identical
2025-07-01 05:45:01.726
2025-07-01 05:45:01.735 # pump out diffs from before the synch point
2025-07-01 05:45:01.747 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:45:01.755
2025-07-01 05:45:01.761 # do intraline marking on the synch pair
2025-07-01 05:45:01.766 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:45:01.771 if eqi is None:
2025-07-01 05:45:01.777 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:45:01.783 atags = btags = ""
2025-07-01 05:45:01.789 cruncher.set_seqs(aelt, belt)
2025-07-01 05:45:01.795 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:45:01.801 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:45:01.807 if tag == 'replace':
2025-07-01 05:45:01.814 atags += '^' * la
2025-07-01 05:45:01.821 btags += '^' * lb
2025-07-01 05:45:01.827 elif tag == 'delete':
2025-07-01 05:45:01.839 atags += '-' * la
2025-07-01 05:45:01.849 elif tag == 'insert':
2025-07-01 05:45:01.860 btags += '+' * lb
2025-07-01 05:45:01.867 elif tag == 'equal':
2025-07-01 05:45:01.874 atags += ' ' * la
2025-07-01 05:45:01.880 btags += ' ' * lb
2025-07-01 05:45:01.886 else:
2025-07-01 05:45:01.892 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:45:01.899 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:45:01.905 else:
2025-07-01 05:45:01.911 # the synch pair is identical
2025-07-01 05:45:01.917 yield ' ' + aelt
2025-07-01 05:45:01.923
2025-07-01 05:45:01.930 # pump out diffs from after the synch point
2025-07-01 05:45:01.937 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:45:01.947
2025-07-01 05:45:01.956 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:45:01.972 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:45:01.985
2025-07-01 05:45:01.993 self = <difflib.Differ object at [hex]>
2025-07-01 05:45:02.001 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:45:02.008 alo = 65, ahi = 1101
2025-07-01 05:45:02.015 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:45:02.022 blo = 65, bhi = 1101
2025-07-01 05:45:02.028
2025-07-01 05:45:02.035 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:45:02.041 g = []
2025-07-01 05:45:02.048 if alo < ahi:
2025-07-01 05:45:02.055 if blo < bhi:
2025-07-01 05:45:02.062 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:45:02.068 else:
2025-07-01 05:45:02.074 g = self._dump('-', a, alo, ahi)
2025-07-01 05:45:02.079 elif blo < bhi:
2025-07-01 05:45:02.084 g = self._dump('+', b, blo, bhi)
2025-07-01 05:45:02.089
2025-07-01 05:45:02.095 > yield from g
2025-07-01 05:45:02.100
2025-07-01 05:45:02.107 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:45:02.116 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:45:02.127
2025-07-01 05:45:02.135 self = <difflib.Differ object at [hex]>
2025-07-01 05:45:02.143 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:45:02.151 alo = 65, ahi = 1101
2025-07-01 05:45:02.160 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:45:02.167 blo = 65, bhi = 1101
2025-07-01 05:45:02.175
2025-07-01 05:45:02.182 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:45:02.187 r"""
2025-07-01 05:45:02.193 When replacing one block of lines with another, search the blocks
2025-07-01 05:45:02.198 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:45:02.204 synch point, and intraline difference marking is done on the
2025-07-01 05:45:02.210 similar pair. Lots of work, but often worth it.
2025-07-01 05:45:02.216
2025-07-01 05:45:02.221 Example:
2025-07-01 05:45:02.227
2025-07-01 05:45:02.233 >>> d = Differ()
2025-07-01 05:45:02.242 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:45:02.254 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:45:02.263 >>> print(''.join(results), end="")
2025-07-01 05:45:02.271 - abcDefghiJkl
2025-07-01 05:45:02.285 + abcdefGhijkl
2025-07-01 05:45:02.298 """
2025-07-01 05:45:02.307
2025-07-01 05:45:02.315 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:45:02.324 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:45:02.331 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:45:02.336 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:45:02.343 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:45:02.352
2025-07-01 05:45:02.361 # search for the pair that matches best without being identical
2025-07-01 05:45:02.367 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:45:02.374 # on junk -- unless we have to)
2025-07-01 05:45:02.384 for j in range(blo, bhi):
2025-07-01 05:45:02.395 bj = b[j]
2025-07-01 05:45:02.403 cruncher.set_seq2(bj)
2025-07-01 05:45:02.410 for i in range(alo, ahi):
2025-07-01 05:45:02.419 ai = a[i]
2025-07-01 05:45:02.429 if ai == bj:
2025-07-01 05:45:02.442 if eqi is None:
2025-07-01 05:45:02.452 eqi, eqj = i, j
2025-07-01 05:45:02.461 continue
2025-07-01 05:45:02.469 cruncher.set_seq1(ai)
2025-07-01 05:45:02.475 # computing similarity is expensive, so use the quick
2025-07-01 05:45:02.482 # upper bounds first -- have seen this speed up messy
2025-07-01 05:45:02.490 # compares by a factor of 3.
2025-07-01 05:45:02.499 # note that ratio() is only expensive to compute the first
2025-07-01 05:45:02.507 # time it's called on a sequence pair; the expensive part
2025-07-01 05:45:02.515 # of the computation is cached by cruncher
2025-07-01 05:45:02.525 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:45:02.534 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:45:02.542 cruncher.ratio() > best_ratio:
2025-07-01 05:45:02.549 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:45:02.559 if best_ratio < cutoff:
2025-07-01 05:45:02.567 # no non-identical "pretty close" pair
2025-07-01 05:45:02.573 if eqi is None:
2025-07-01 05:45:02.583 # no identical pair either -- treat it as a straight replace
2025-07-01 05:45:02.597 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:45:02.609 return
2025-07-01 05:45:02.617 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:45:02.625 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:45:02.631 else:
2025-07-01 05:45:02.639 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:45:02.650 eqi = None
2025-07-01 05:45:02.658
2025-07-01 05:45:02.664 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:45:02.669 # identical
2025-07-01 05:45:02.677
2025-07-01 05:45:02.688 # pump out diffs from before the synch point
2025-07-01 05:45:02.696 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:45:02.702
2025-07-01 05:45:02.708 # do intraline marking on the synch pair
2025-07-01 05:45:02.714 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:45:02.723 if eqi is None:
2025-07-01 05:45:02.730 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:45:02.738 atags = btags = ""
2025-07-01 05:45:02.744 cruncher.set_seqs(aelt, belt)
2025-07-01 05:45:02.750 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:45:02.756 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:45:02.762 if tag == 'replace':
2025-07-01 05:45:02.770 atags += '^' * la
2025-07-01 05:45:02.779 btags += '^' * lb
2025-07-01 05:45:02.788 elif tag == 'delete':
2025-07-01 05:45:02.797 atags += '-' * la
2025-07-01 05:45:02.804 elif tag == 'insert':
2025-07-01 05:45:02.810 btags += '+' * lb
2025-07-01 05:45:02.817 elif tag == 'equal':
2025-07-01 05:45:02.822 atags += ' ' * la
2025-07-01 05:45:02.828 btags += ' ' * lb
2025-07-01 05:45:02.834 else:
2025-07-01 05:45:02.841 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:45:02.847 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:45:02.853 else:
2025-07-01 05:45:02.859 # the synch pair is identical
2025-07-01 05:45:02.867 yield ' ' + aelt
2025-07-01 05:45:02.874
2025-07-01 05:45:02.880 # pump out diffs from after the synch point
2025-07-01 05:45:02.885 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:45:02.890
2025-07-01 05:45:02.898 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:45:02.907 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:45:02.914
2025-07-01 05:45:02.922 self = <difflib.Differ object at [hex]>
2025-07-01 05:45:02.930 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:45:02.936 alo = 66, ahi = 1101
2025-07-01 05:45:02.942 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:45:02.947 blo = 66, bhi = 1101
2025-07-01 05:45:02.955
2025-07-01 05:45:02.967 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:45:02.975 g = []
2025-07-01 05:45:02.982 if alo < ahi:
2025-07-01 05:45:02.987 if blo < bhi:
2025-07-01 05:45:02.992 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:45:02.997 else:
2025-07-01 05:45:03.004 g = self._dump('-', a, alo, ahi)
2025-07-01 05:45:03.017 elif blo < bhi:
2025-07-01 05:45:03.029 g = self._dump('+', b, blo, bhi)
2025-07-01 05:45:03.039
2025-07-01 05:45:03.047 > yield from g
2025-07-01 05:45:03.054
2025-07-01 05:45:03.064 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:45:03.074 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:45:03.082
2025-07-01 05:45:03.093 self = <difflib.Differ object at [hex]>
2025-07-01 05:45:03.102 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:45:03.114 alo = 66, ahi = 1101
2025-07-01 05:45:03.128 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:45:03.136 blo = 66, bhi = 1101
2025-07-01 05:45:03.143
2025-07-01 05:45:03.149 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:45:03.156 r"""
2025-07-01 05:45:03.163 When replacing one block of lines with another, search the blocks
2025-07-01 05:45:03.170 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:45:03.181 synch point, and intraline difference marking is done on the
2025-07-01 05:45:03.191 similar pair. Lots of work, but often worth it.
2025-07-01 05:45:03.202
2025-07-01 05:45:03.213 Example:
2025-07-01 05:45:03.225
2025-07-01 05:45:03.236 >>> d = Differ()
2025-07-01 05:45:03.245 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:45:03.254 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:45:03.266 >>> print(''.join(results), end="")
2025-07-01 05:45:03.275 - abcDefghiJkl
2025-07-01 05:45:03.293 + abcdefGhijkl
2025-07-01 05:45:03.313 """
2025-07-01 05:45:03.322
2025-07-01 05:45:03.329 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:45:03.335 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:45:03.348 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:45:03.356 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:45:03.364 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:45:03.372
2025-07-01 05:45:03.378 # search for the pair that matches best without being identical
2025-07-01 05:45:03.384 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:45:03.390 # on junk -- unless we have to)
2025-07-01 05:45:03.400 for j in range(blo, bhi):
2025-07-01 05:45:03.409 bj = b[j]
2025-07-01 05:45:03.421 cruncher.set_seq2(bj)
2025-07-01 05:45:03.433 for i in range(alo, ahi):
2025-07-01 05:45:03.443 ai = a[i]
2025-07-01 05:45:03.454 if ai == bj:
2025-07-01 05:45:03.464 if eqi is None:
2025-07-01 05:45:03.475 eqi, eqj = i, j
2025-07-01 05:45:03.486 continue
2025-07-01 05:45:03.495 cruncher.set_seq1(ai)
2025-07-01 05:45:03.507 # computing similarity is expensive, so use the quick
2025-07-01 05:45:03.515 # upper bounds first -- have seen this speed up messy
2025-07-01 05:45:03.521 # compares by a factor of 3.
2025-07-01 05:45:03.533 # note that ratio() is only expensive to compute the first
2025-07-01 05:45:03.546 # time it's called on a sequence pair; the expensive part
2025-07-01 05:45:03.556 # of the computation is cached by cruncher
2025-07-01 05:45:03.565 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:45:03.575 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:45:03.586 cruncher.ratio() > best_ratio:
2025-07-01 05:45:03.595 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:45:03.602 if best_ratio < cutoff:
2025-07-01 05:45:03.613 # no non-identical "pretty close" pair
2025-07-01 05:45:03.624 if eqi is None:
2025-07-01 05:45:03.638 # no identical pair either -- treat it as a straight replace
2025-07-01 05:45:03.649 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:45:03.658 return
2025-07-01 05:45:03.666 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:45:03.671 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:45:03.681 else:
2025-07-01 05:45:03.693 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:45:03.702 eqi = None
2025-07-01 05:45:03.713
2025-07-01 05:45:03.723 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:45:03.730 # identical
2025-07-01 05:45:03.736
2025-07-01 05:45:03.742 # pump out diffs from before the synch point
2025-07-01 05:45:03.751 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:45:03.762
2025-07-01 05:45:03.768 # do intraline marking on the synch pair
2025-07-01 05:45:03.774 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:45:03.779 if eqi is None:
2025-07-01 05:45:03.789 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:45:03.802 atags = btags = ""
2025-07-01 05:45:03.813 cruncher.set_seqs(aelt, belt)
2025-07-01 05:45:03.820 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:45:03.826 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:45:03.832 if tag == 'replace':
2025-07-01 05:45:03.838 atags += '^' * la
2025-07-01 05:45:03.844 btags += '^' * lb
2025-07-01 05:45:03.851 elif tag == 'delete':
2025-07-01 05:45:03.858 atags += '-' * la
2025-07-01 05:45:03.868 elif tag == 'insert':
2025-07-01 05:45:03.876 btags += '+' * lb
2025-07-01 05:45:03.884 elif tag == 'equal':
2025-07-01 05:45:03.890 atags += ' ' * la
2025-07-01 05:45:03.894 btags += ' ' * lb
2025-07-01 05:45:03.900 else:
2025-07-01 05:45:03.907 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:45:03.913 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:45:03.921 else:
2025-07-01 05:45:03.934 # the synch pair is identical
2025-07-01 05:45:03.946 yield ' ' + aelt
2025-07-01 05:45:03.958
2025-07-01 05:45:03.970 # pump out diffs from after the synch point
2025-07-01 05:45:03.982 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:45:03.991
2025-07-01 05:45:04.000 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:45:04.013 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:45:04.022
2025-07-01 05:45:04.030 self = <difflib.Differ object at [hex]>
2025-07-01 05:45:04.045 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:45:04.056 alo = 67, ahi = 1101
2025-07-01 05:45:04.065 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:45:04.072 blo = 67, bhi = 1101
2025-07-01 05:45:04.078
2025-07-01 05:45:04.087 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:45:04.098 g = []
2025-07-01 05:45:04.110 if alo < ahi:
2025-07-01 05:45:04.122 if blo < bhi:
2025-07-01 05:45:04.132 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:45:04.143 else:
2025-07-01 05:45:04.151 g = self._dump('-', a, alo, ahi)
2025-07-01 05:45:04.161 elif blo < bhi:
2025-07-01 05:45:04.174 g = self._dump('+', b, blo, bhi)
2025-07-01 05:45:04.186
2025-07-01 05:45:04.198 > yield from g
2025-07-01 05:45:04.208
2025-07-01 05:45:04.215 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:45:04.221 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:45:04.227
2025-07-01 05:45:04.233 self = <difflib.Differ object at [hex]>
2025-07-01 05:45:04.242 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:45:04.251 alo = 67, ahi = 1101
2025-07-01 05:45:04.260 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:45:04.266 blo = 67, bhi = 1101
2025-07-01 05:45:04.272
2025-07-01 05:45:04.278 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:45:04.283 r"""
2025-07-01 05:45:04.289 When replacing one block of lines with another, search the blocks
2025-07-01 05:45:04.295 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:45:04.303 synch point, and intraline difference marking is done on the
2025-07-01 05:45:04.314 similar pair. Lots of work, but often worth it.
2025-07-01 05:45:04.326
2025-07-01 05:45:04.334 Example:
2025-07-01 05:45:04.346
2025-07-01 05:45:04.356 >>> d = Differ()
2025-07-01 05:45:04.364 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:45:04.370 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:45:04.377 >>> print(''.join(results), end="")
2025-07-01 05:45:04.385 - abcDefghiJkl
2025-07-01 05:45:04.405 + abcdefGhijkl
2025-07-01 05:45:04.423 """
2025-07-01 05:45:04.430
2025-07-01 05:45:04.436 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:45:04.446 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:45:04.454 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:45:04.462 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:45:04.470 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:45:04.477
2025-07-01 05:45:04.482 # search for the pair that matches best without being identical
2025-07-01 05:45:04.488 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:45:04.494 # on junk -- unless we have to)
2025-07-01 05:45:04.500 for j in range(blo, bhi):
2025-07-01 05:45:04.506 bj = b[j]
2025-07-01 05:45:04.516 cruncher.set_seq2(bj)
2025-07-01 05:45:04.523 for i in range(alo, ahi):
2025-07-01 05:45:04.529 ai = a[i]
2025-07-01 05:45:04.533 if ai == bj:
2025-07-01 05:45:04.538 if eqi is None:
2025-07-01 05:45:04.543 eqi, eqj = i, j
2025-07-01 05:45:04.548 continue
2025-07-01 05:45:04.552 cruncher.set_seq1(ai)
2025-07-01 05:45:04.558 # computing similarity is expensive, so use the quick
2025-07-01 05:45:04.564 # upper bounds first -- have seen this speed up messy
2025-07-01 05:45:04.571 # compares by a factor of 3.
2025-07-01 05:45:04.577 # note that ratio() is only expensive to compute the first
2025-07-01 05:45:04.585 # time it's called on a sequence pair; the expensive part
2025-07-01 05:45:04.591 # of the computation is cached by cruncher
2025-07-01 05:45:04.599 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:45:04.605 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:45:04.611 cruncher.ratio() > best_ratio:
2025-07-01 05:45:04.617 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:45:04.622 if best_ratio < cutoff:
2025-07-01 05:45:04.627 # no non-identical "pretty close" pair
2025-07-01 05:45:04.632 if eqi is None:
2025-07-01 05:45:04.637 # no identical pair either -- treat it as a straight replace
2025-07-01 05:45:04.641 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:45:04.646 return
2025-07-01 05:45:04.651 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:45:04.656 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:45:04.661 else:
2025-07-01 05:45:04.673 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:45:04.682 eqi = None
2025-07-01 05:45:04.691
2025-07-01 05:45:04.704 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:45:04.717 # identical
2025-07-01 05:45:04.730
2025-07-01 05:45:04.742 # pump out diffs from before the synch point
2025-07-01 05:45:04.753 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:45:04.762
2025-07-01 05:45:04.772 # do intraline marking on the synch pair
2025-07-01 05:45:04.784 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:45:04.794 if eqi is None:
2025-07-01 05:45:04.803 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:45:04.815 atags = btags = ""
2025-07-01 05:45:04.824 cruncher.set_seqs(aelt, belt)
2025-07-01 05:45:04.831 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:45:04.838 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:45:04.848 if tag == 'replace':
2025-07-01 05:45:04.863 atags += '^' * la
2025-07-01 05:45:04.872 btags += '^' * lb
2025-07-01 05:45:04.880 elif tag == 'delete':
2025-07-01 05:45:04.888 atags += '-' * la
2025-07-01 05:45:04.895 elif tag == 'insert':
2025-07-01 05:45:04.902 btags += '+' * lb
2025-07-01 05:45:04.908 elif tag == 'equal':
2025-07-01 05:45:04.914 atags += ' ' * la
2025-07-01 05:45:04.922 btags += ' ' * lb
2025-07-01 05:45:04.930 else:
2025-07-01 05:45:04.938 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:45:04.945 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:45:04.950 else:
2025-07-01 05:45:04.958 # the synch pair is identical
2025-07-01 05:45:04.970 yield ' ' + aelt
2025-07-01 05:45:04.979
2025-07-01 05:45:04.988 # pump out diffs from after the synch point
2025-07-01 05:45:04.995 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:45:05.005
2025-07-01 05:45:05.019 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:45:05.030 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:45:05.039
2025-07-01 05:45:05.047 self = <difflib.Differ object at [hex]>
2025-07-01 05:45:05.055 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:45:05.062 alo = 70, ahi = 1101
2025-07-01 05:45:05.068 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:45:05.075 blo = 70, bhi = 1101
2025-07-01 05:45:05.084
2025-07-01 05:45:05.097 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:45:05.107 g = []
2025-07-01 05:45:05.117 if alo < ahi:
2025-07-01 05:45:05.126 if blo < bhi:
2025-07-01 05:45:05.133 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:45:05.139 else:
2025-07-01 05:45:05.149 g = self._dump('-', a, alo, ahi)
2025-07-01 05:45:05.156 elif blo < bhi:
2025-07-01 05:45:05.162 g = self._dump('+', b, blo, bhi)
2025-07-01 05:45:05.168
2025-07-01 05:45:05.174 > yield from g
2025-07-01 05:45:05.181
2025-07-01 05:45:05.191 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:45:05.202 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:45:05.210
2025-07-01 05:45:05.216 self = <difflib.Differ object at [hex]>
2025-07-01 05:45:05.224 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:45:05.236 alo = 70, ahi = 1101
2025-07-01 05:45:05.246 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:45:05.253 blo = 70, bhi = 1101
2025-07-01 05:45:05.260
2025-07-01 05:45:05.266 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:45:05.272 r"""
2025-07-01 05:45:05.278 When replacing one block of lines with another, search the blocks
2025-07-01 05:45:05.284 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:45:05.289 synch point, and intraline difference marking is done on the
2025-07-01 05:45:05.294 similar pair. Lots of work, but often worth it.
2025-07-01 05:45:05.299
2025-07-01 05:45:05.305 Example:
2025-07-01 05:45:05.314
2025-07-01 05:45:05.320 >>> d = Differ()
2025-07-01 05:45:05.326 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:45:05.339 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:45:05.349 >>> print(''.join(results), end="")
2025-07-01 05:45:05.358 - abcDefghiJkl
2025-07-01 05:45:05.373 + abcdefGhijkl
2025-07-01 05:45:05.383 """
2025-07-01 05:45:05.387
2025-07-01 05:45:05.392 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:45:05.402 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:45:05.410 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:45:05.417 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:45:05.424 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:45:05.430
2025-07-01 05:45:05.437 # search for the pair that matches best without being identical
2025-07-01 05:45:05.443 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:45:05.450 # on junk -- unless we have to)
2025-07-01 05:45:05.454 for j in range(blo, bhi):
2025-07-01 05:45:05.459 bj = b[j]
2025-07-01 05:45:05.463 cruncher.set_seq2(bj)
2025-07-01 05:45:05.467 for i in range(alo, ahi):
2025-07-01 05:45:05.472 ai = a[i]
2025-07-01 05:45:05.476 if ai == bj:
2025-07-01 05:45:05.480 if eqi is None:
2025-07-01 05:45:05.485 eqi, eqj = i, j
2025-07-01 05:45:05.489 continue
2025-07-01 05:45:05.494 cruncher.set_seq1(ai)
2025-07-01 05:45:05.498 # computing similarity is expensive, so use the quick
2025-07-01 05:45:05.503 # upper bounds first -- have seen this speed up messy
2025-07-01 05:45:05.507 # compares by a factor of 3.
2025-07-01 05:45:05.513 # note that ratio() is only expensive to compute the first
2025-07-01 05:45:05.518 # time it's called on a sequence pair; the expensive part
2025-07-01 05:45:05.524 # of the computation is cached by cruncher
2025-07-01 05:45:05.531 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:45:05.538 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:45:05.550 cruncher.ratio() > best_ratio:
2025-07-01 05:45:05.557 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:45:05.563 if best_ratio < cutoff:
2025-07-01 05:45:05.569 # no non-identical "pretty close" pair
2025-07-01 05:45:05.576 if eqi is None:
2025-07-01 05:45:05.583 # no identical pair either -- treat it as a straight replace
2025-07-01 05:45:05.591 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:45:05.599 return
2025-07-01 05:45:05.610 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:45:05.621 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:45:05.628 else:
2025-07-01 05:45:05.634 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:45:05.641 eqi = None
2025-07-01 05:45:05.647
2025-07-01 05:45:05.658 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:45:05.669 # identical
2025-07-01 05:45:05.677
2025-07-01 05:45:05.684 # pump out diffs from before the synch point
2025-07-01 05:45:05.691 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:45:05.699
2025-07-01 05:45:05.711 # do intraline marking on the synch pair
2025-07-01 05:45:05.719 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:45:05.730 if eqi is None:
2025-07-01 05:45:05.738 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:45:05.746 atags = btags = ""
2025-07-01 05:45:05.753 cruncher.set_seqs(aelt, belt)
2025-07-01 05:45:05.765 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:45:05.775 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:45:05.782 if tag == 'replace':
2025-07-01 05:45:05.788 atags += '^' * la
2025-07-01 05:45:05.795 btags += '^' * lb
2025-07-01 05:45:05.807 elif tag == 'delete':
2025-07-01 05:45:05.821 atags += '-' * la
2025-07-01 05:45:05.833 elif tag == 'insert':
2025-07-01 05:45:05.845 btags += '+' * lb
2025-07-01 05:45:05.856 elif tag == 'equal':
2025-07-01 05:45:05.864 atags += ' ' * la
2025-07-01 05:45:05.871 btags += ' ' * lb
2025-07-01 05:45:05.877 else:
2025-07-01 05:45:05.887 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:45:05.895 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:45:05.904 else:
2025-07-01 05:45:05.911 # the synch pair is identical
2025-07-01 05:45:05.919 yield ' ' + aelt
2025-07-01 05:45:05.932
2025-07-01 05:45:05.945 # pump out diffs from after the synch point
2025-07-01 05:45:05.954 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:45:05.964
2025-07-01 05:45:05.975 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:45:05.988 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:45:05.997
2025-07-01 05:45:06.005 self = <difflib.Differ object at [hex]>
2025-07-01 05:45:06.013 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:45:06.020 alo = 71, ahi = 1101
2025-07-01 05:45:06.028 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:45:06.034 blo = 71, bhi = 1101
2025-07-01 05:45:06.041
2025-07-01 05:45:06.051 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:45:06.062 g = []
2025-07-01 05:45:06.076 if alo < ahi:
2025-07-01 05:45:06.084 if blo < bhi:
2025-07-01 05:45:06.092 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:45:06.098 else:
2025-07-01 05:45:06.107 g = self._dump('-', a, alo, ahi)
2025-07-01 05:45:06.118 elif blo < bhi:
2025-07-01 05:45:06.127 g = self._dump('+', b, blo, bhi)
2025-07-01 05:45:06.133
2025-07-01 05:45:06.140 > yield from g
2025-07-01 05:45:06.145
2025-07-01 05:45:06.151 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:45:06.162 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:45:06.172
2025-07-01 05:45:06.181 self = <difflib.Differ object at [hex]>
2025-07-01 05:45:06.187 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:45:06.193 alo = 71, ahi = 1101
2025-07-01 05:45:06.199 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:45:06.204 blo = 71, bhi = 1101
2025-07-01 05:45:06.209
2025-07-01 05:45:06.216 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:45:06.222 r"""
2025-07-01 05:45:06.229 When replacing one block of lines with another, search the blocks
2025-07-01 05:45:06.235 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:45:06.242 synch point, and intraline difference marking is done on the
2025-07-01 05:45:06.248 similar pair. Lots of work, but often worth it.
2025-07-01 05:45:06.254
2025-07-01 05:45:06.259 Example:
2025-07-01 05:45:06.265
2025-07-01 05:45:06.270 >>> d = Differ()
2025-07-01 05:45:06.278 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:45:06.289 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:45:06.297 >>> print(''.join(results), end="")
2025-07-01 05:45:06.304 - abcDefghiJkl
2025-07-01 05:45:06.317 + abcdefGhijkl
2025-07-01 05:45:06.330 """
2025-07-01 05:45:06.339
2025-07-01 05:45:06.350 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:45:06.358 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:45:06.365 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:45:06.371 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:45:06.376 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:45:06.381
2025-07-01 05:45:06.389 # search for the pair that matches best without being identical
2025-07-01 05:45:06.399 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:45:06.411 # on junk -- unless we have to)
2025-07-01 05:45:06.421 for j in range(blo, bhi):
2025-07-01 05:45:06.429 bj = b[j]
2025-07-01 05:45:06.437 cruncher.set_seq2(bj)
2025-07-01 05:45:06.443 for i in range(alo, ahi):
2025-07-01 05:45:06.450 ai = a[i]
2025-07-01 05:45:06.456 if ai == bj:
2025-07-01 05:45:06.463 if eqi is None:
2025-07-01 05:45:06.471 eqi, eqj = i, j
2025-07-01 05:45:06.478 continue
2025-07-01 05:45:06.485 cruncher.set_seq1(ai)
2025-07-01 05:45:06.492 # computing similarity is expensive, so use the quick
2025-07-01 05:45:06.499 # upper bounds first -- have seen this speed up messy
2025-07-01 05:45:06.507 # compares by a factor of 3.
2025-07-01 05:45:06.521 # note that ratio() is only expensive to compute the first
2025-07-01 05:45:06.531 # time it's called on a sequence pair; the expensive part
2025-07-01 05:45:06.538 # of the computation is cached by cruncher
2025-07-01 05:45:06.545 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:45:06.551 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:45:06.558 cruncher.ratio() > best_ratio:
2025-07-01 05:45:06.566 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:45:06.572 if best_ratio < cutoff:
2025-07-01 05:45:06.580 # no non-identical "pretty close" pair
2025-07-01 05:45:06.587 if eqi is None:
2025-07-01 05:45:06.594 # no identical pair either -- treat it as a straight replace
2025-07-01 05:45:06.607 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:45:06.617 return
2025-07-01 05:45:06.625 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:45:06.632 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:45:06.638 else:
2025-07-01 05:45:06.645 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:45:06.652 eqi = None
2025-07-01 05:45:06.659
2025-07-01 05:45:06.668 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:45:06.680 # identical
2025-07-01 05:45:06.689
2025-07-01 05:45:06.696 # pump out diffs from before the synch point
2025-07-01 05:45:06.703 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:45:06.709
2025-07-01 05:45:06.715 # do intraline marking on the synch pair
2025-07-01 05:45:06.720 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:45:06.726 if eqi is None:
2025-07-01 05:45:06.737 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:45:06.747 atags = btags = ""
2025-07-01 05:45:06.755 cruncher.set_seqs(aelt, belt)
2025-07-01 05:45:06.760 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:45:06.765 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:45:06.776 if tag == 'replace':
2025-07-01 05:45:06.785 atags += '^' * la
2025-07-01 05:45:06.792 btags += '^' * lb
2025-07-01 05:45:06.798 elif tag == 'delete':
2025-07-01 05:45:06.803 atags += '-' * la
2025-07-01 05:45:06.810 elif tag == 'insert':
2025-07-01 05:45:06.821 btags += '+' * lb
2025-07-01 05:45:06.830 elif tag == 'equal':
2025-07-01 05:45:06.838 atags += ' ' * la
2025-07-01 05:45:06.846 btags += ' ' * lb
2025-07-01 05:45:06.855 else:
2025-07-01 05:45:06.862 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:45:06.869 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:45:06.876 else:
2025-07-01 05:45:06.882 # the synch pair is identical
2025-07-01 05:45:06.889 yield ' ' + aelt
2025-07-01 05:45:06.894
2025-07-01 05:45:06.899 # pump out diffs from after the synch point
2025-07-01 05:45:06.903 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:45:06.908
2025-07-01 05:45:06.918 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:45:06.925 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:45:06.931
2025-07-01 05:45:06.938 self = <difflib.Differ object at [hex]>
2025-07-01 05:45:06.946 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:45:06.952 alo = 72, ahi = 1101
2025-07-01 05:45:06.958 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:45:06.967 blo = 72, bhi = 1101
2025-07-01 05:45:06.978
2025-07-01 05:45:06.984 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:45:06.990 g = []
2025-07-01 05:45:06.996 if alo < ahi:
2025-07-01 05:45:07.005 if blo < bhi:
2025-07-01 05:45:07.012 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:45:07.019 else:
2025-07-01 05:45:07.026 g = self._dump('-', a, alo, ahi)
2025-07-01 05:45:07.032 elif blo < bhi:
2025-07-01 05:45:07.038 g = self._dump('+', b, blo, bhi)
2025-07-01 05:45:07.045
2025-07-01 05:45:07.051 > yield from g
2025-07-01 05:45:07.057
2025-07-01 05:45:07.063 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:45:07.068 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:45:07.073
2025-07-01 05:45:07.078 self = <difflib.Differ object at [hex]>
2025-07-01 05:45:07.084 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:45:07.088 alo = 72, ahi = 1101
2025-07-01 05:45:07.099 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:45:07.107 blo = 72, bhi = 1101
2025-07-01 05:45:07.113
2025-07-01 05:45:07.119 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:45:07.126 r"""
2025-07-01 05:45:07.133 When replacing one block of lines with another, search the blocks
2025-07-01 05:45:07.140 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:45:07.148 synch point, and intraline difference marking is done on the
2025-07-01 05:45:07.154 similar pair. Lots of work, but often worth it.
2025-07-01 05:45:07.159
2025-07-01 05:45:07.165 Example:
2025-07-01 05:45:07.170
2025-07-01 05:45:07.181 >>> d = Differ()
2025-07-01 05:45:07.191 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:45:07.200 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:45:07.209 >>> print(''.join(results), end="")
2025-07-01 05:45:07.221 - abcDefghiJkl
2025-07-01 05:45:07.238 + abcdefGhijkl
2025-07-01 05:45:07.258 """
2025-07-01 05:45:07.266
2025-07-01 05:45:07.275 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:45:07.286 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:45:07.295 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:45:07.304 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:45:07.314 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:45:07.323
2025-07-01 05:45:07.333 # search for the pair that matches best without being identical
2025-07-01 05:45:07.344 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:45:07.356 # on junk -- unless we have to)
2025-07-01 05:45:07.367 for j in range(blo, bhi):
2025-07-01 05:45:07.378 bj = b[j]
2025-07-01 05:45:07.387 cruncher.set_seq2(bj)
2025-07-01 05:45:07.396 for i in range(alo, ahi):
2025-07-01 05:45:07.403 ai = a[i]
2025-07-01 05:45:07.409 if ai == bj:
2025-07-01 05:45:07.415 if eqi is None:
2025-07-01 05:45:07.421 eqi, eqj = i, j
2025-07-01 05:45:07.426 continue
2025-07-01 05:45:07.433 cruncher.set_seq1(ai)
2025-07-01 05:45:07.439 # computing similarity is expensive, so use the quick
2025-07-01 05:45:07.446 # upper bounds first -- have seen this speed up messy
2025-07-01 05:45:07.456 # compares by a factor of 3.
2025-07-01 05:45:07.468 # note that ratio() is only expensive to compute the first
2025-07-01 05:45:07.477 # time it's called on a sequence pair; the expensive part
2025-07-01 05:45:07.489 # of the computation is cached by cruncher
2025-07-01 05:45:07.499 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:45:07.507 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:45:07.520 cruncher.ratio() > best_ratio:
2025-07-01 05:45:07.531 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:45:07.539 if best_ratio < cutoff:
2025-07-01 05:45:07.547 # no non-identical "pretty close" pair
2025-07-01 05:45:07.554 if eqi is None:
2025-07-01 05:45:07.559 # no identical pair either -- treat it as a straight replace
2025-07-01 05:45:07.564 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:45:07.569 return
2025-07-01 05:45:07.575 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:45:07.581 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:45:07.593 else:
2025-07-01 05:45:07.605 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:45:07.611 eqi = None
2025-07-01 05:45:07.617
2025-07-01 05:45:07.623 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:45:07.628 # identical
2025-07-01 05:45:07.634
2025-07-01 05:45:07.640 # pump out diffs from before the synch point
2025-07-01 05:45:07.647 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:45:07.654
2025-07-01 05:45:07.660 # do intraline marking on the synch pair
2025-07-01 05:45:07.667 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:45:07.675 if eqi is None:
2025-07-01 05:45:07.684 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:45:07.690 atags = btags = ""
2025-07-01 05:45:07.697 cruncher.set_seqs(aelt, belt)
2025-07-01 05:45:07.705 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:45:07.711 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:45:07.717 if tag == 'replace':
2025-07-01 05:45:07.729 atags += '^' * la
2025-07-01 05:45:07.739 btags += '^' * lb
2025-07-01 05:45:07.745 elif tag == 'delete':
2025-07-01 05:45:07.751 atags += '-' * la
2025-07-01 05:45:07.757 elif tag == 'insert':
2025-07-01 05:45:07.769 btags += '+' * lb
2025-07-01 05:45:07.781 elif tag == 'equal':
2025-07-01 05:45:07.792 atags += ' ' * la
2025-07-01 05:45:07.802 btags += ' ' * lb
2025-07-01 05:45:07.809 else:
2025-07-01 05:45:07.816 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:45:07.823 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:45:07.829 else:
2025-07-01 05:45:07.834 # the synch pair is identical
2025-07-01 05:45:07.840 yield ' ' + aelt
2025-07-01 05:45:07.846
2025-07-01 05:45:07.852 # pump out diffs from after the synch point
2025-07-01 05:45:07.858 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:45:07.863
2025-07-01 05:45:07.869 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:45:07.875 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:45:07.880
2025-07-01 05:45:07.889 self = <difflib.Differ object at [hex]>
2025-07-01 05:45:07.901 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:45:07.909 alo = 73, ahi = 1101
2025-07-01 05:45:07.917 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:45:07.923 blo = 73, bhi = 1101
2025-07-01 05:45:07.929
2025-07-01 05:45:07.934 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:45:07.938 g = []
2025-07-01 05:45:07.943 if alo < ahi:
2025-07-01 05:45:07.949 if blo < bhi:
2025-07-01 05:45:07.954 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:45:07.960 else:
2025-07-01 05:45:07.966 g = self._dump('-', a, alo, ahi)
2025-07-01 05:45:07.973 elif blo < bhi:
2025-07-01 05:45:07.979 g = self._dump('+', b, blo, bhi)
2025-07-01 05:45:07.986
2025-07-01 05:45:07.993 > yield from g
2025-07-01 05:45:08.000
2025-07-01 05:45:08.007 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:45:08.014 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:45:08.021
2025-07-01 05:45:08.027 self = <difflib.Differ object at [hex]>
2025-07-01 05:45:08.034 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:45:08.039 alo = 73, ahi = 1101
2025-07-01 05:45:08.046 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:45:08.052 blo = 73, bhi = 1101
2025-07-01 05:45:08.058
2025-07-01 05:45:08.064 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:45:08.070 r"""
2025-07-01 05:45:08.076 When replacing one block of lines with another, search the blocks
2025-07-01 05:45:08.082 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:45:08.088 synch point, and intraline difference marking is done on the
2025-07-01 05:45:08.094 similar pair. Lots of work, but often worth it.
2025-07-01 05:45:08.100
2025-07-01 05:45:08.112 Example:
2025-07-01 05:45:08.121
2025-07-01 05:45:08.129 >>> d = Differ()
2025-07-01 05:45:08.135 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:45:08.141 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:45:08.147 >>> print(''.join(results), end="")
2025-07-01 05:45:08.152 - abcDefghiJkl
2025-07-01 05:45:08.164 + abcdefGhijkl
2025-07-01 05:45:08.176 """
2025-07-01 05:45:08.181
2025-07-01 05:45:08.191 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:45:08.202 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:45:08.213 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:45:08.223 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:45:08.232 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:45:08.238
2025-07-01 05:45:08.244 # search for the pair that matches best without being identical
2025-07-01 05:45:08.253 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:45:08.265 # on junk -- unless we have to)
2025-07-01 05:45:08.273 for j in range(blo, bhi):
2025-07-01 05:45:08.281 bj = b[j]
2025-07-01 05:45:08.287 cruncher.set_seq2(bj)
2025-07-01 05:45:08.295 for i in range(alo, ahi):
2025-07-01 05:45:08.306 ai = a[i]
2025-07-01 05:45:08.314 if ai == bj:
2025-07-01 05:45:08.320 if eqi is None:
2025-07-01 05:45:08.326 eqi, eqj = i, j
2025-07-01 05:45:08.331 continue
2025-07-01 05:45:08.337 cruncher.set_seq1(ai)
2025-07-01 05:45:08.343 # computing similarity is expensive, so use the quick
2025-07-01 05:45:08.350 # upper bounds first -- have seen this speed up messy
2025-07-01 05:45:08.361 # compares by a factor of 3.
2025-07-01 05:45:08.369 # note that ratio() is only expensive to compute the first
2025-07-01 05:45:08.376 # time it's called on a sequence pair; the expensive part
2025-07-01 05:45:08.383 # of the computation is cached by cruncher
2025-07-01 05:45:08.390 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:45:08.398 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:45:08.409 cruncher.ratio() > best_ratio:
2025-07-01 05:45:08.416 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:45:08.422 if best_ratio < cutoff:
2025-07-01 05:45:08.429 # no non-identical "pretty close" pair
2025-07-01 05:45:08.436 if eqi is None:
2025-07-01 05:45:08.444 # no identical pair either -- treat it as a straight replace
2025-07-01 05:45:08.452 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:45:08.459 return
2025-07-01 05:45:08.466 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:45:08.476 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:45:08.488 else:
2025-07-01 05:45:08.498 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:45:08.504 eqi = None
2025-07-01 05:45:08.510
2025-07-01 05:45:08.515 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:45:08.520 # identical
2025-07-01 05:45:08.526
2025-07-01 05:45:08.532 # pump out diffs from before the synch point
2025-07-01 05:45:08.538 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:45:08.544
2025-07-01 05:45:08.550 # do intraline marking on the synch pair
2025-07-01 05:45:08.556 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:45:08.562 if eqi is None:
2025-07-01 05:45:08.568 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:45:08.574 atags = btags = ""
2025-07-01 05:45:08.579 cruncher.set_seqs(aelt, belt)
2025-07-01 05:45:08.586 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:45:08.591 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:45:08.597 if tag == 'replace':
2025-07-01 05:45:08.603 atags += '^' * la
2025-07-01 05:45:08.608 btags += '^' * lb
2025-07-01 05:45:08.614 elif tag == 'delete':
2025-07-01 05:45:08.619 atags += '-' * la
2025-07-01 05:45:08.631 elif tag == 'insert':
2025-07-01 05:45:08.642 btags += '+' * lb
2025-07-01 05:45:08.649 elif tag == 'equal':
2025-07-01 05:45:08.655 atags += ' ' * la
2025-07-01 05:45:08.661 btags += ' ' * lb
2025-07-01 05:45:08.666 else:
2025-07-01 05:45:08.672 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:45:08.678 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:45:08.683 else:
2025-07-01 05:45:08.690 # the synch pair is identical
2025-07-01 05:45:08.702 yield ' ' + aelt
2025-07-01 05:45:08.713
2025-07-01 05:45:08.721 # pump out diffs from after the synch point
2025-07-01 05:45:08.728 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:45:08.733
2025-07-01 05:45:08.739 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:45:08.754 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:45:08.761
2025-07-01 05:45:08.767 self = <difflib.Differ object at [hex]>
2025-07-01 05:45:08.772 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:45:08.778 alo = 74, ahi = 1101
2025-07-01 05:45:08.785 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:45:08.789 blo = 74, bhi = 1101
2025-07-01 05:45:08.794
2025-07-01 05:45:08.800 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:45:08.805 g = []
2025-07-01 05:45:08.809 if alo < ahi:
2025-07-01 05:45:08.818 if blo < bhi:
2025-07-01 05:45:08.825 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:45:08.830 else:
2025-07-01 05:45:08.835 g = self._dump('-', a, alo, ahi)
2025-07-01 05:45:08.840 elif blo < bhi:
2025-07-01 05:45:08.845 g = self._dump('+', b, blo, bhi)
2025-07-01 05:45:08.850
2025-07-01 05:45:08.855 > yield from g
2025-07-01 05:45:08.861
2025-07-01 05:45:08.866 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:45:08.876 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:45:08.886
2025-07-01 05:45:08.893 self = <difflib.Differ object at [hex]>
2025-07-01 05:45:08.900 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:45:08.906 alo = 74, ahi = 1101
2025-07-01 05:45:08.911 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:45:08.915 blo = 74, bhi = 1101
2025-07-01 05:45:08.919
2025-07-01 05:45:08.924 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:45:08.928 r"""
2025-07-01 05:45:08.934 When replacing one block of lines with another, search the blocks
2025-07-01 05:45:08.938 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:45:08.943 synch point, and intraline difference marking is done on the
2025-07-01 05:45:08.947 similar pair. Lots of work, but often worth it.
2025-07-01 05:45:08.951
2025-07-01 05:45:08.956 Example:
2025-07-01 05:45:08.961
2025-07-01 05:45:08.967 >>> d = Differ()
2025-07-01 05:45:08.973 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:45:08.977 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:45:08.982 >>> print(''.join(results), end="")
2025-07-01 05:45:08.986 - abcDefghiJkl
2025-07-01 05:45:08.995 + abcdefGhijkl
2025-07-01 05:45:09.003 """
2025-07-01 05:45:09.015
2025-07-01 05:45:09.022 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:45:09.033 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:45:09.043 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:45:09.051 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:45:09.058 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:45:09.063
2025-07-01 05:45:09.068 # search for the pair that matches best without being identical
2025-07-01 05:45:09.072 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:45:09.076 # on junk -- unless we have to)
2025-07-01 05:45:09.082 for j in range(blo, bhi):
2025-07-01 05:45:09.092 bj = b[j]
2025-07-01 05:45:09.099 cruncher.set_seq2(bj)
2025-07-01 05:45:09.106 for i in range(alo, ahi):
2025-07-01 05:45:09.113 ai = a[i]
2025-07-01 05:45:09.121 if ai == bj:
2025-07-01 05:45:09.129 if eqi is None:
2025-07-01 05:45:09.135 eqi, eqj = i, j
2025-07-01 05:45:09.145 continue
2025-07-01 05:45:09.158 cruncher.set_seq1(ai)
2025-07-01 05:45:09.168 # computing similarity is expensive, so use the quick
2025-07-01 05:45:09.177 # upper bounds first -- have seen this speed up messy
2025-07-01 05:45:09.184 # compares by a factor of 3.
2025-07-01 05:45:09.196 # note that ratio() is only expensive to compute the first
2025-07-01 05:45:09.204 # time it's called on a sequence pair; the expensive part
2025-07-01 05:45:09.211 # of the computation is cached by cruncher
2025-07-01 05:45:09.217 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:45:09.223 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:45:09.230 cruncher.ratio() > best_ratio:
2025-07-01 05:45:09.241 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:45:09.249 if best_ratio < cutoff:
2025-07-01 05:45:09.256 # no non-identical "pretty close" pair
2025-07-01 05:45:09.262 if eqi is None:
2025-07-01 05:45:09.269 # no identical pair either -- treat it as a straight replace
2025-07-01 05:45:09.275 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:45:09.280 return
2025-07-01 05:45:09.285 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:45:09.291 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:45:09.296 else:
2025-07-01 05:45:09.303 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:45:09.311 eqi = None
2025-07-01 05:45:09.318
2025-07-01 05:45:09.330 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:45:09.340 # identical
2025-07-01 05:45:09.352
2025-07-01 05:45:09.361 # pump out diffs from before the synch point
2025-07-01 05:45:09.371 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:45:09.380
2025-07-01 05:45:09.392 # do intraline marking on the synch pair
2025-07-01 05:45:09.400 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:45:09.407 if eqi is None:
2025-07-01 05:45:09.413 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:45:09.419 atags = btags = ""
2025-07-01 05:45:09.427 cruncher.set_seqs(aelt, belt)
2025-07-01 05:45:09.438 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:45:09.450 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:45:09.462 if tag == 'replace':
2025-07-01 05:45:09.472 atags += '^' * la
2025-07-01 05:45:09.480 btags += '^' * lb
2025-07-01 05:45:09.486 elif tag == 'delete':
2025-07-01 05:45:09.493 atags += '-' * la
2025-07-01 05:45:09.499 elif tag == 'insert':
2025-07-01 05:45:09.506 btags += '+' * lb
2025-07-01 05:45:09.517 elif tag == 'equal':
2025-07-01 05:45:09.526 atags += ' ' * la
2025-07-01 05:45:09.538 btags += ' ' * lb
2025-07-01 05:45:09.549 else:
2025-07-01 05:45:09.560 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:45:09.567 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:45:09.574 else:
2025-07-01 05:45:09.581 # the synch pair is identical
2025-07-01 05:45:09.589 yield ' ' + aelt
2025-07-01 05:45:09.600
2025-07-01 05:45:09.607 # pump out diffs from after the synch point
2025-07-01 05:45:09.613 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:45:09.619
2025-07-01 05:45:09.627 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:45:09.638 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:45:09.650
2025-07-01 05:45:09.660 self = <difflib.Differ object at [hex]>
2025-07-01 05:45:09.667 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:45:09.674 alo = 75, ahi = 1101
2025-07-01 05:45:09.688 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:45:09.695 blo = 75, bhi = 1101
2025-07-01 05:45:09.702
2025-07-01 05:45:09.714 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:45:09.722 g = []
2025-07-01 05:45:09.728 if alo < ahi:
2025-07-01 05:45:09.734 if blo < bhi:
2025-07-01 05:45:09.740 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:45:09.746 else:
2025-07-01 05:45:09.758 g = self._dump('-', a, alo, ahi)
2025-07-01 05:45:09.768 elif blo < bhi:
2025-07-01 05:45:09.775 g = self._dump('+', b, blo, bhi)
2025-07-01 05:45:09.780
2025-07-01 05:45:09.785 > yield from g
2025-07-01 05:45:09.790
2025-07-01 05:45:09.794 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:45:09.799 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:45:09.805
2025-07-01 05:45:09.811 self = <difflib.Differ object at [hex]>
2025-07-01 05:45:09.819 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:45:09.825 alo = 75, ahi = 1101
2025-07-01 05:45:09.833 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:45:09.839 blo = 75, bhi = 1101
2025-07-01 05:45:09.845
2025-07-01 05:45:09.850 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:45:09.856 r"""
2025-07-01 05:45:09.864 When replacing one block of lines with another, search the blocks
2025-07-01 05:45:09.871 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:45:09.878 synch point, and intraline difference marking is done on the
2025-07-01 05:45:09.884 similar pair. Lots of work, but often worth it.
2025-07-01 05:45:09.891
2025-07-01 05:45:09.899 Example:
2025-07-01 05:45:09.911
2025-07-01 05:45:09.920 >>> d = Differ()
2025-07-01 05:45:09.927 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:45:09.933 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:45:09.939 >>> print(''.join(results), end="")
2025-07-01 05:45:09.945 - abcDefghiJkl
2025-07-01 05:45:09.962 + abcdefGhijkl
2025-07-01 05:45:09.979 """
2025-07-01 05:45:09.985
2025-07-01 05:45:09.994 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:45:10.005 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:45:10.014 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:45:10.022 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:45:10.028 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:45:10.033
2025-07-01 05:45:10.039 # search for the pair that matches best without being identical
2025-07-01 05:45:10.044 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:45:10.051 # on junk -- unless we have to)
2025-07-01 05:45:10.056 for j in range(blo, bhi):
2025-07-01 05:45:10.062 bj = b[j]
2025-07-01 05:45:10.071 cruncher.set_seq2(bj)
2025-07-01 05:45:10.080 for i in range(alo, ahi):
2025-07-01 05:45:10.088 ai = a[i]
2025-07-01 05:45:10.095 if ai == bj:
2025-07-01 05:45:10.107 if eqi is None:
2025-07-01 05:45:10.116 eqi, eqj = i, j
2025-07-01 05:45:10.129 continue
2025-07-01 05:45:10.140 cruncher.set_seq1(ai)
2025-07-01 05:45:10.152 # computing similarity is expensive, so use the quick
2025-07-01 05:45:10.161 # upper bounds first -- have seen this speed up messy
2025-07-01 05:45:10.168 # compares by a factor of 3.
2025-07-01 05:45:10.175 # note that ratio() is only expensive to compute the first
2025-07-01 05:45:10.181 # time it's called on a sequence pair; the expensive part
2025-07-01 05:45:10.187 # of the computation is cached by cruncher
2025-07-01 05:45:10.193 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:45:10.200 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:45:10.207 cruncher.ratio() > best_ratio:
2025-07-01 05:45:10.219 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:45:10.227 if best_ratio < cutoff:
2025-07-01 05:45:10.233 # no non-identical "pretty close" pair
2025-07-01 05:45:10.239 if eqi is None:
2025-07-01 05:45:10.243 # no identical pair either -- treat it as a straight replace
2025-07-01 05:45:10.253 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:45:10.262 return
2025-07-01 05:45:10.270 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:45:10.279 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:45:10.291 else:
2025-07-01 05:45:10.300 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:45:10.308 eqi = None
2025-07-01 05:45:10.315
2025-07-01 05:45:10.322 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:45:10.331 # identical
2025-07-01 05:45:10.338
2025-07-01 05:45:10.343 # pump out diffs from before the synch point
2025-07-01 05:45:10.348 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:45:10.353
2025-07-01 05:45:10.358 # do intraline marking on the synch pair
2025-07-01 05:45:10.368 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:45:10.378 if eqi is None:
2025-07-01 05:45:10.386 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:45:10.395 atags = btags = ""
2025-07-01 05:45:10.406 cruncher.set_seqs(aelt, belt)
2025-07-01 05:45:10.418 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:45:10.430 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:45:10.443 if tag == 'replace':
2025-07-01 05:45:10.453 atags += '^' * la
2025-07-01 05:45:10.462 btags += '^' * lb
2025-07-01 05:45:10.471 elif tag == 'delete':
2025-07-01 05:45:10.477 atags += '-' * la
2025-07-01 05:45:10.488 elif tag == 'insert':
2025-07-01 05:45:10.499 btags += '+' * lb
2025-07-01 05:45:10.507 elif tag == 'equal':
2025-07-01 05:45:10.514 atags += ' ' * la
2025-07-01 05:45:10.521 btags += ' ' * lb
2025-07-01 05:45:10.526 else:
2025-07-01 05:45:10.533 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:45:10.538 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:45:10.550 else:
2025-07-01 05:45:10.562 # the synch pair is identical
2025-07-01 05:45:10.570 yield ' ' + aelt
2025-07-01 05:45:10.577
2025-07-01 05:45:10.584 # pump out diffs from after the synch point
2025-07-01 05:45:10.591 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:45:10.599
2025-07-01 05:45:10.609 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:45:10.622 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:45:10.631
2025-07-01 05:45:10.640 self = <difflib.Differ object at [hex]>
2025-07-01 05:45:10.653 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:45:10.663 alo = 76, ahi = 1101
2025-07-01 05:45:10.671 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:45:10.682 blo = 76, bhi = 1101
2025-07-01 05:45:10.693
2025-07-01 05:45:10.703 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:45:10.710 g = []
2025-07-01 05:45:10.717 if alo < ahi:
2025-07-01 05:45:10.724 if blo < bhi:
2025-07-01 05:45:10.730 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:45:10.738 else:
2025-07-01 05:45:10.746 g = self._dump('-', a, alo, ahi)
2025-07-01 05:45:10.753 elif blo < bhi:
2025-07-01 05:45:10.761 g = self._dump('+', b, blo, bhi)
2025-07-01 05:45:10.768
2025-07-01 05:45:10.774 > yield from g
2025-07-01 05:45:10.779
2025-07-01 05:45:10.783 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:45:10.788 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:45:10.792
2025-07-01 05:45:10.798 self = <difflib.Differ object at [hex]>
2025-07-01 05:45:10.806 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:45:10.813 alo = 76, ahi = 1101
2025-07-01 05:45:10.826 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:45:10.835 blo = 76, bhi = 1101
2025-07-01 05:45:10.847
2025-07-01 05:45:10.861 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:45:10.871 r"""
2025-07-01 05:45:10.884 When replacing one block of lines with another, search the blocks
2025-07-01 05:45:10.898 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:45:10.908 synch point, and intraline difference marking is done on the
2025-07-01 05:45:10.914 similar pair. Lots of work, but often worth it.
2025-07-01 05:45:10.923
2025-07-01 05:45:10.934 Example:
2025-07-01 05:45:10.943
2025-07-01 05:45:10.952 >>> d = Differ()
2025-07-01 05:45:10.961 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:45:10.969 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:45:10.980 >>> print(''.join(results), end="")
2025-07-01 05:45:10.989 - abcDefghiJkl
2025-07-01 05:45:11.009 + abcdefGhijkl
2025-07-01 05:45:11.028 """
2025-07-01 05:45:11.036
2025-07-01 05:45:11.043 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:45:11.050 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:45:11.058 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:45:11.069 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:45:11.081 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:45:11.089
2025-07-01 05:45:11.097 # search for the pair that matches best without being identical
2025-07-01 05:45:11.104 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:45:11.110 # on junk -- unless we have to)
2025-07-01 05:45:11.116 for j in range(blo, bhi):
2025-07-01 05:45:11.122 bj = b[j]
2025-07-01 05:45:11.133 cruncher.set_seq2(bj)
2025-07-01 05:45:11.143 for i in range(alo, ahi):
2025-07-01 05:45:11.152 ai = a[i]
2025-07-01 05:45:11.163 if ai == bj:
2025-07-01 05:45:11.176 if eqi is None:
2025-07-01 05:45:11.185 eqi, eqj = i, j
2025-07-01 05:45:11.192 continue
2025-07-01 05:45:11.198 cruncher.set_seq1(ai)
2025-07-01 05:45:11.204 # computing similarity is expensive, so use the quick
2025-07-01 05:45:11.211 # upper bounds first -- have seen this speed up messy
2025-07-01 05:45:11.216 # compares by a factor of 3.
2025-07-01 05:45:11.221 # note that ratio() is only expensive to compute the first
2025-07-01 05:45:11.226 # time it's called on a sequence pair; the expensive part
2025-07-01 05:45:11.231 # of the computation is cached by cruncher
2025-07-01 05:45:11.237 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:45:11.246 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:45:11.256 cruncher.ratio() > best_ratio:
2025-07-01 05:45:11.271 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:45:11.279 if best_ratio < cutoff:
2025-07-01 05:45:11.286 # no non-identical "pretty close" pair
2025-07-01 05:45:11.295 if eqi is None:
2025-07-01 05:45:11.305 # no identical pair either -- treat it as a straight replace
2025-07-01 05:45:11.314 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:45:11.321 return
2025-07-01 05:45:11.327 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:45:11.335 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:45:11.345 else:
2025-07-01 05:45:11.355 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:45:11.364 eqi = None
2025-07-01 05:45:11.371
2025-07-01 05:45:11.378 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:45:11.390 # identical
2025-07-01 05:45:11.400
2025-07-01 05:45:11.411 # pump out diffs from before the synch point
2025-07-01 05:45:11.421 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:45:11.429
2025-07-01 05:45:11.437 # do intraline marking on the synch pair
2025-07-01 05:45:11.444 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:45:11.451 if eqi is None:
2025-07-01 05:45:11.458 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:45:11.466 atags = btags = ""
2025-07-01 05:45:11.475 cruncher.set_seqs(aelt, belt)
2025-07-01 05:45:11.484 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:45:11.490 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:45:11.495 if tag == 'replace':
2025-07-01 05:45:11.500 atags += '^' * la
2025-07-01 05:45:11.505 btags += '^' * lb
2025-07-01 05:45:11.510 elif tag == 'delete':
2025-07-01 05:45:11.516 atags += '-' * la
2025-07-01 05:45:11.523 elif tag == 'insert':
2025-07-01 05:45:11.530 btags += '+' * lb
2025-07-01 05:45:11.538 elif tag == 'equal':
2025-07-01 05:45:11.548 atags += ' ' * la
2025-07-01 05:45:11.558 btags += ' ' * lb
2025-07-01 05:45:11.566 else:
2025-07-01 05:45:11.573 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:45:11.580 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:45:11.587 else:
2025-07-01 05:45:11.593 # the synch pair is identical
2025-07-01 05:45:11.600 yield ' ' + aelt
2025-07-01 05:45:11.605
2025-07-01 05:45:11.612 # pump out diffs from after the synch point
2025-07-01 05:45:11.618 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:45:11.627
2025-07-01 05:45:11.637 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:45:11.644 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:45:11.651
2025-07-01 05:45:11.657 self = <difflib.Differ object at [hex]>
2025-07-01 05:45:11.664 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:45:11.670 alo = 77, ahi = 1101
2025-07-01 05:45:11.677 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:45:11.684 blo = 77, bhi = 1101
2025-07-01 05:45:11.690
2025-07-01 05:45:11.697 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:45:11.707 g = []
2025-07-01 05:45:11.717 if alo < ahi:
2025-07-01 05:45:11.725 if blo < bhi:
2025-07-01 05:45:11.733 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:45:11.741 else:
2025-07-01 05:45:11.748 g = self._dump('-', a, alo, ahi)
2025-07-01 05:45:11.755 elif blo < bhi:
2025-07-01 05:45:11.762 g = self._dump('+', b, blo, bhi)
2025-07-01 05:45:11.771
2025-07-01 05:45:11.782 > yield from g
2025-07-01 05:45:11.789
2025-07-01 05:45:11.795 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:45:11.801 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:45:11.813
2025-07-01 05:45:11.825 self = <difflib.Differ object at [hex]>
2025-07-01 05:45:11.836 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:45:11.845 alo = 77, ahi = 1101
2025-07-01 05:45:11.858 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:45:11.867 blo = 77, bhi = 1101
2025-07-01 05:45:11.875
2025-07-01 05:45:11.883 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:45:11.894 r"""
2025-07-01 05:45:11.903 When replacing one block of lines with another, search the blocks
2025-07-01 05:45:11.911 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:45:11.917 synch point, and intraline difference marking is done on the
2025-07-01 05:45:11.929 similar pair. Lots of work, but often worth it.
2025-07-01 05:45:11.938
2025-07-01 05:45:11.946 Example:
2025-07-01 05:45:11.954
2025-07-01 05:45:11.964 >>> d = Differ()
2025-07-01 05:45:11.975 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:45:11.983 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:45:11.989 >>> print(''.join(results), end="")
2025-07-01 05:45:11.995 - abcDefghiJkl
2025-07-01 05:45:12.008 + abcdefGhijkl
2025-07-01 05:45:12.023 """
2025-07-01 05:45:12.033
2025-07-01 05:45:12.046 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:45:12.054 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:45:12.060 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:45:12.066 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:45:12.072 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:45:12.077
2025-07-01 05:45:12.082 # search for the pair that matches best without being identical
2025-07-01 05:45:12.088 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:45:12.094 # on junk -- unless we have to)
2025-07-01 05:45:12.100 for j in range(blo, bhi):
2025-07-01 05:45:12.106 bj = b[j]
2025-07-01 05:45:12.114 cruncher.set_seq2(bj)
2025-07-01 05:45:12.122 for i in range(alo, ahi):
2025-07-01 05:45:12.129 ai = a[i]
2025-07-01 05:45:12.135 if ai == bj:
2025-07-01 05:45:12.142 if eqi is None:
2025-07-01 05:45:12.151 eqi, eqj = i, j
2025-07-01 05:45:12.161 continue
2025-07-01 05:45:12.168 cruncher.set_seq1(ai)
2025-07-01 05:45:12.174 # computing similarity is expensive, so use the quick
2025-07-01 05:45:12.180 # upper bounds first -- have seen this speed up messy
2025-07-01 05:45:12.186 # compares by a factor of 3.
2025-07-01 05:45:12.192 # note that ratio() is only expensive to compute the first
2025-07-01 05:45:12.198 # time it's called on a sequence pair; the expensive part
2025-07-01 05:45:12.210 # of the computation is cached by cruncher
2025-07-01 05:45:12.219 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:45:12.226 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:45:12.232 cruncher.ratio() > best_ratio:
2025-07-01 05:45:12.238 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:45:12.251 if best_ratio < cutoff:
2025-07-01 05:45:12.261 # no non-identical "pretty close" pair
2025-07-01 05:45:12.273 if eqi is None:
2025-07-01 05:45:12.283 # no identical pair either -- treat it as a straight replace
2025-07-01 05:45:12.291 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:45:12.299 return
2025-07-01 05:45:12.308 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:45:12.321 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:45:12.331 else:
2025-07-01 05:45:12.342 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:45:12.349 eqi = None
2025-07-01 05:45:12.355
2025-07-01 05:45:12.362 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:45:12.368 # identical
2025-07-01 05:45:12.376
2025-07-01 05:45:12.389 # pump out diffs from before the synch point
2025-07-01 05:45:12.397 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:45:12.409
2025-07-01 05:45:12.422 # do intraline marking on the synch pair
2025-07-01 05:45:12.433 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:45:12.441 if eqi is None:
2025-07-01 05:45:12.453 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:45:12.463 atags = btags = ""
2025-07-01 05:45:12.475 cruncher.set_seqs(aelt, belt)
2025-07-01 05:45:12.486 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:45:12.493 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:45:12.500 if tag == 'replace':
2025-07-01 05:45:12.506 atags += '^' * la
2025-07-01 05:45:12.512 btags += '^' * lb
2025-07-01 05:45:12.519 elif tag == 'delete':
2025-07-01 05:45:12.530 atags += '-' * la
2025-07-01 05:45:12.539 elif tag == 'insert':
2025-07-01 05:45:12.547 btags += '+' * lb
2025-07-01 05:45:12.560 elif tag == 'equal':
2025-07-01 05:45:12.572 atags += ' ' * la
2025-07-01 05:45:12.582 btags += ' ' * lb
2025-07-01 05:45:12.592 else:
2025-07-01 05:45:12.598 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:45:12.604 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:45:12.610 else:
2025-07-01 05:45:12.616 # the synch pair is identical
2025-07-01 05:45:12.622 yield ' ' + aelt
2025-07-01 05:45:12.633
2025-07-01 05:45:12.647 # pump out diffs from after the synch point
2025-07-01 05:45:12.659 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:45:12.672
2025-07-01 05:45:12.685 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:45:12.694 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:45:12.701
2025-07-01 05:45:12.707 self = <difflib.Differ object at [hex]>
2025-07-01 05:45:12.713 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:45:12.718 alo = 78, ahi = 1101
2025-07-01 05:45:12.727 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:45:12.732 blo = 78, bhi = 1101
2025-07-01 05:45:12.737
2025-07-01 05:45:12.743 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:45:12.753 g = []
2025-07-01 05:45:12.760 if alo < ahi:
2025-07-01 05:45:12.768 if blo < bhi:
2025-07-01 05:45:12.774 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:45:12.779 else:
2025-07-01 05:45:12.783 g = self._dump('-', a, alo, ahi)
2025-07-01 05:45:12.788 elif blo < bhi:
2025-07-01 05:45:12.795 g = self._dump('+', b, blo, bhi)
2025-07-01 05:45:12.801
2025-07-01 05:45:12.807 > yield from g
2025-07-01 05:45:12.812
2025-07-01 05:45:12.818 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:45:12.827 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:45:12.835
2025-07-01 05:45:12.848 self = <difflib.Differ object at [hex]>
2025-07-01 05:45:12.858 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:45:12.864 alo = 78, ahi = 1101
2025-07-01 05:45:12.871 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:45:12.882 blo = 78, bhi = 1101
2025-07-01 05:45:12.894
2025-07-01 05:45:12.905 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:45:12.912 r"""
2025-07-01 05:45:12.920 When replacing one block of lines with another, search the blocks
2025-07-01 05:45:12.925 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:45:12.933 synch point, and intraline difference marking is done on the
2025-07-01 05:45:12.941 similar pair. Lots of work, but often worth it.
2025-07-01 05:45:12.948
2025-07-01 05:45:12.956 Example:
2025-07-01 05:45:12.962
2025-07-01 05:45:12.968 >>> d = Differ()
2025-07-01 05:45:12.974 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:45:12.979 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:45:12.985 >>> print(''.join(results), end="")
2025-07-01 05:45:12.990 - abcDefghiJkl
2025-07-01 05:45:13.002 + abcdefGhijkl
2025-07-01 05:45:13.015 """
2025-07-01 05:45:13.021
2025-07-01 05:45:13.027 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:45:13.033 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:45:13.038 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:45:13.043 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:45:13.051 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:45:13.061
2025-07-01 05:45:13.069 # search for the pair that matches best without being identical
2025-07-01 05:45:13.079 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:45:13.089 # on junk -- unless we have to)
2025-07-01 05:45:13.102 for j in range(blo, bhi):
2025-07-01 05:45:13.113 bj = b[j]
2025-07-01 05:45:13.121 cruncher.set_seq2(bj)
2025-07-01 05:45:13.128 for i in range(alo, ahi):
2025-07-01 05:45:13.137 ai = a[i]
2025-07-01 05:45:13.148 if ai == bj:
2025-07-01 05:45:13.159 if eqi is None:
2025-07-01 05:45:13.172 eqi, eqj = i, j
2025-07-01 05:45:13.183 continue
2025-07-01 05:45:13.195 cruncher.set_seq1(ai)
2025-07-01 05:45:13.208 # computing similarity is expensive, so use the quick
2025-07-01 05:45:13.218 # upper bounds first -- have seen this speed up messy
2025-07-01 05:45:13.229 # compares by a factor of 3.
2025-07-01 05:45:13.238 # note that ratio() is only expensive to compute the first
2025-07-01 05:45:13.247 # time it's called on a sequence pair; the expensive part
2025-07-01 05:45:13.254 # of the computation is cached by cruncher
2025-07-01 05:45:13.260 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:45:13.266 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:45:13.275 cruncher.ratio() > best_ratio:
2025-07-01 05:45:13.287 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:45:13.297 if best_ratio < cutoff:
2025-07-01 05:45:13.309 # no non-identical "pretty close" pair
2025-07-01 05:45:13.322 if eqi is None:
2025-07-01 05:45:13.333 # no identical pair either -- treat it as a straight replace
2025-07-01 05:45:13.341 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:45:13.349 return
2025-07-01 05:45:13.355 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:45:13.362 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:45:13.367 else:
2025-07-01 05:45:13.373 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:45:13.379 eqi = None
2025-07-01 05:45:13.386
2025-07-01 05:45:13.395 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:45:13.402 # identical
2025-07-01 05:45:13.408
2025-07-01 05:45:13.414 # pump out diffs from before the synch point
2025-07-01 05:45:13.420 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:45:13.428
2025-07-01 05:45:13.440 # do intraline marking on the synch pair
2025-07-01 05:45:13.453 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:45:13.465 if eqi is None:
2025-07-01 05:45:13.476 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:45:13.485 atags = btags = ""
2025-07-01 05:45:13.492 cruncher.set_seqs(aelt, belt)
2025-07-01 05:45:13.498 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:45:13.503 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:45:13.508 if tag == 'replace':
2025-07-01 05:45:13.514 atags += '^' * la
2025-07-01 05:45:13.519 btags += '^' * lb
2025-07-01 05:45:13.524 elif tag == 'delete':
2025-07-01 05:45:13.530 atags += '-' * la
2025-07-01 05:45:13.535 elif tag == 'insert':
2025-07-01 05:45:13.540 btags += '+' * lb
2025-07-01 05:45:13.545 elif tag == 'equal':
2025-07-01 05:45:13.549 atags += ' ' * la
2025-07-01 05:45:13.555 btags += ' ' * lb
2025-07-01 05:45:13.561 else:
2025-07-01 05:45:13.567 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:45:13.573 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:45:13.579 else:
2025-07-01 05:45:13.587 # the synch pair is identical
2025-07-01 05:45:13.595 yield ' ' + aelt
2025-07-01 05:45:13.602
2025-07-01 05:45:13.608 # pump out diffs from after the synch point
2025-07-01 05:45:13.620 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:45:13.629
2025-07-01 05:45:13.638 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:45:13.647 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:45:13.654
2025-07-01 05:45:13.661 self = <difflib.Differ object at [hex]>
2025-07-01 05:45:13.668 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:45:13.674 alo = 79, ahi = 1101
2025-07-01 05:45:13.680 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:45:13.685 blo = 79, bhi = 1101
2025-07-01 05:45:13.689
2025-07-01 05:45:13.694 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:45:13.702 g = []
2025-07-01 05:45:13.711 if alo < ahi:
2025-07-01 05:45:13.723 if blo < bhi:
2025-07-01 05:45:13.732 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:45:13.740 else:
2025-07-01 05:45:13.747 g = self._dump('-', a, alo, ahi)
2025-07-01 05:45:13.753 elif blo < bhi:
2025-07-01 05:45:13.767 g = self._dump('+', b, blo, bhi)
2025-07-01 05:45:13.777
2025-07-01 05:45:13.789 > yield from g
2025-07-01 05:45:13.800
2025-07-01 05:45:13.808 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:45:13.822 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:45:13.834
2025-07-01 05:45:13.842 self = <difflib.Differ object at [hex]>
2025-07-01 05:45:13.851 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:45:13.858 alo = 79, ahi = 1101
2025-07-01 05:45:13.867 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:45:13.875 blo = 79, bhi = 1101
2025-07-01 05:45:13.886
2025-07-01 05:45:13.893 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:45:13.901 r"""
2025-07-01 05:45:13.909 When replacing one block of lines with another, search the blocks
2025-07-01 05:45:13.914 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:45:13.922 synch point, and intraline difference marking is done on the
2025-07-01 05:45:13.930 similar pair. Lots of work, but often worth it.
2025-07-01 05:45:13.936
2025-07-01 05:45:13.943 Example:
2025-07-01 05:45:13.950
2025-07-01 05:45:13.957 >>> d = Differ()
2025-07-01 05:45:13.964 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:45:13.971 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:45:13.978 >>> print(''.join(results), end="")
2025-07-01 05:45:13.985 - abcDefghiJkl
2025-07-01 05:45:14.000 + abcdefGhijkl
2025-07-01 05:45:14.015 """
2025-07-01 05:45:14.024
2025-07-01 05:45:14.036 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:45:14.045 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:45:14.051 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:45:14.057 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:45:14.063 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:45:14.069
2025-07-01 05:45:14.077 # search for the pair that matches best without being identical
2025-07-01 05:45:14.087 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:45:14.100 # on junk -- unless we have to)
2025-07-01 05:45:14.107 for j in range(blo, bhi):
2025-07-01 05:45:14.115 bj = b[j]
2025-07-01 05:45:14.123 cruncher.set_seq2(bj)
2025-07-01 05:45:14.131 for i in range(alo, ahi):
2025-07-01 05:45:14.138 ai = a[i]
2025-07-01 05:45:14.144 if ai == bj:
2025-07-01 05:45:14.151 if eqi is None:
2025-07-01 05:45:14.158 eqi, eqj = i, j
2025-07-01 05:45:14.164 continue
2025-07-01 05:45:14.170 cruncher.set_seq1(ai)
2025-07-01 05:45:14.178 # computing similarity is expensive, so use the quick
2025-07-01 05:45:14.187 # upper bounds first -- have seen this speed up messy
2025-07-01 05:45:14.195 # compares by a factor of 3.
2025-07-01 05:45:14.202 # note that ratio() is only expensive to compute the first
2025-07-01 05:45:14.211 # time it's called on a sequence pair; the expensive part
2025-07-01 05:45:14.218 # of the computation is cached by cruncher
2025-07-01 05:45:14.227 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:45:14.239 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:45:14.246 cruncher.ratio() > best_ratio:
2025-07-01 05:45:14.253 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:45:14.260 if best_ratio < cutoff:
2025-07-01 05:45:14.266 # no non-identical "pretty close" pair
2025-07-01 05:45:14.273 if eqi is None:
2025-07-01 05:45:14.279 # no identical pair either -- treat it as a straight replace
2025-07-01 05:45:14.286 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:45:14.298 return
2025-07-01 05:45:14.307 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:45:14.313 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:45:14.319 else:
2025-07-01 05:45:14.325 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:45:14.330 eqi = None
2025-07-01 05:45:14.335
2025-07-01 05:45:14.340 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:45:14.346 # identical
2025-07-01 05:45:14.353
2025-07-01 05:45:14.362 # pump out diffs from before the synch point
2025-07-01 05:45:14.373 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:45:14.382
2025-07-01 05:45:14.389 # do intraline marking on the synch pair
2025-07-01 05:45:14.396 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:45:14.403 if eqi is None:
2025-07-01 05:45:14.411 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:45:14.418 atags = btags = ""
2025-07-01 05:45:14.424 cruncher.set_seqs(aelt, belt)
2025-07-01 05:45:14.429 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:45:14.434 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:45:14.442 if tag == 'replace':
2025-07-01 05:45:14.454 atags += '^' * la
2025-07-01 05:45:14.463 btags += '^' * lb
2025-07-01 05:45:14.474 elif tag == 'delete':
2025-07-01 05:45:14.486 atags += '-' * la
2025-07-01 05:45:14.498 elif tag == 'insert':
2025-07-01 05:45:14.510 btags += '+' * lb
2025-07-01 05:45:14.517 elif tag == 'equal':
2025-07-01 05:45:14.523 atags += ' ' * la
2025-07-01 05:45:14.529 btags += ' ' * lb
2025-07-01 05:45:14.536 else:
2025-07-01 05:45:14.541 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:45:14.547 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:45:14.556 else:
2025-07-01 05:45:14.568 # the synch pair is identical
2025-07-01 05:45:14.577 yield ' ' + aelt
2025-07-01 05:45:14.583
2025-07-01 05:45:14.590 # pump out diffs from after the synch point
2025-07-01 05:45:14.598 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:45:14.605
2025-07-01 05:45:14.612 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:45:14.618 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:45:14.627
2025-07-01 05:45:14.640 self = <difflib.Differ object at [hex]>
2025-07-01 05:45:14.654 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:45:14.664 alo = 80, ahi = 1101
2025-07-01 05:45:14.674 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:45:14.685 blo = 80, bhi = 1101
2025-07-01 05:45:14.696
2025-07-01 05:45:14.707 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:45:14.715 g = []
2025-07-01 05:45:14.723 if alo < ahi:
2025-07-01 05:45:14.730 if blo < bhi:
2025-07-01 05:45:14.739 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:45:14.749 else:
2025-07-01 05:45:14.757 g = self._dump('-', a, alo, ahi)
2025-07-01 05:45:14.764 elif blo < bhi:
2025-07-01 05:45:14.773 g = self._dump('+', b, blo, bhi)
2025-07-01 05:45:14.783
2025-07-01 05:45:14.789 > yield from g
2025-07-01 05:45:14.794
2025-07-01 05:45:14.799 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:45:14.804 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:45:14.810
2025-07-01 05:45:14.815 self = <difflib.Differ object at [hex]>
2025-07-01 05:45:14.823 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:45:14.835 alo = 80, ahi = 1101
2025-07-01 05:45:14.847 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:45:14.856 blo = 80, bhi = 1101
2025-07-01 05:45:14.864
2025-07-01 05:45:14.871 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:45:14.878 r"""
2025-07-01 05:45:14.884 When replacing one block of lines with another, search the blocks
2025-07-01 05:45:14.890 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:45:14.896 synch point, and intraline difference marking is done on the
2025-07-01 05:45:14.902 similar pair. Lots of work, but often worth it.
2025-07-01 05:45:14.907
2025-07-01 05:45:14.913 Example:
2025-07-01 05:45:14.918
2025-07-01 05:45:14.930 >>> d = Differ()
2025-07-01 05:45:14.939 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:45:14.951 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:45:14.961 >>> print(''.join(results), end="")
2025-07-01 05:45:14.968 - abcDefghiJkl
2025-07-01 05:45:14.989 + abcdefGhijkl
2025-07-01 05:45:15.004 """
2025-07-01 05:45:15.011
2025-07-01 05:45:15.018 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:45:15.024 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:45:15.029 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:45:15.035 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:45:15.040 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:45:15.046
2025-07-01 05:45:15.051 # search for the pair that matches best without being identical
2025-07-01 05:45:15.057 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:45:15.063 # on junk -- unless we have to)
2025-07-01 05:45:15.069 for j in range(blo, bhi):
2025-07-01 05:45:15.075 bj = b[j]
2025-07-01 05:45:15.081 cruncher.set_seq2(bj)
2025-07-01 05:45:15.087 for i in range(alo, ahi):
2025-07-01 05:45:15.092 ai = a[i]
2025-07-01 05:45:15.098 if ai == bj:
2025-07-01 05:45:15.104 if eqi is None:
2025-07-01 05:45:15.110 eqi, eqj = i, j
2025-07-01 05:45:15.116 continue
2025-07-01 05:45:15.122 cruncher.set_seq1(ai)
2025-07-01 05:45:15.128 # computing similarity is expensive, so use the quick
2025-07-01 05:45:15.134 # upper bounds first -- have seen this speed up messy
2025-07-01 05:45:15.140 # compares by a factor of 3.
2025-07-01 05:45:15.146 # note that ratio() is only expensive to compute the first
2025-07-01 05:45:15.154 # time it's called on a sequence pair; the expensive part
2025-07-01 05:45:15.164 # of the computation is cached by cruncher
2025-07-01 05:45:15.171 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:45:15.178 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:45:15.184 cruncher.ratio() > best_ratio:
2025-07-01 05:45:15.190 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:45:15.197 if best_ratio < cutoff:
2025-07-01 05:45:15.203 # no non-identical "pretty close" pair
2025-07-01 05:45:15.210 if eqi is None:
2025-07-01 05:45:15.216 # no identical pair either -- treat it as a straight replace
2025-07-01 05:45:15.224 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:45:15.231 return
2025-07-01 05:45:15.240 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:45:15.248 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:45:15.256 else:
2025-07-01 05:45:15.267 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:45:15.279 eqi = None
2025-07-01 05:45:15.287
2025-07-01 05:45:15.295 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:45:15.301 # identical
2025-07-01 05:45:15.308
2025-07-01 05:45:15.314 # pump out diffs from before the synch point
2025-07-01 05:45:15.320 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:45:15.327
2025-07-01 05:45:15.334 # do intraline marking on the synch pair
2025-07-01 05:45:15.341 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:45:15.348 if eqi is None:
2025-07-01 05:45:15.354 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:45:15.361 atags = btags = ""
2025-07-01 05:45:15.369 cruncher.set_seqs(aelt, belt)
2025-07-01 05:45:15.377 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:45:15.388 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:45:15.397 if tag == 'replace':
2025-07-01 05:45:15.405 atags += '^' * la
2025-07-01 05:45:15.415 btags += '^' * lb
2025-07-01 05:45:15.422 elif tag == 'delete':
2025-07-01 05:45:15.430 atags += '-' * la
2025-07-01 05:45:15.441 elif tag == 'insert':
2025-07-01 05:45:15.449 btags += '+' * lb
2025-07-01 05:45:15.457 elif tag == 'equal':
2025-07-01 05:45:15.464 atags += ' ' * la
2025-07-01 05:45:15.477 btags += ' ' * lb
2025-07-01 05:45:15.485 else:
2025-07-01 05:45:15.493 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:45:15.501 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:45:15.508 else:
2025-07-01 05:45:15.515 # the synch pair is identical
2025-07-01 05:45:15.522 yield ' ' + aelt
2025-07-01 05:45:15.533
2025-07-01 05:45:15.541 # pump out diffs from after the synch point
2025-07-01 05:45:15.549 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:45:15.560
2025-07-01 05:45:15.571 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:45:15.584 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:45:15.596
2025-07-01 05:45:15.607 self = <difflib.Differ object at [hex]>
2025-07-01 05:45:15.617 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:45:15.624 alo = 81, ahi = 1101
2025-07-01 05:45:15.631 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:45:15.640 blo = 81, bhi = 1101
2025-07-01 05:45:15.652
2025-07-01 05:45:15.664 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:45:15.674 g = []
2025-07-01 05:45:15.684 if alo < ahi:
2025-07-01 05:45:15.694 if blo < bhi:
2025-07-01 05:45:15.703 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:45:15.709 else:
2025-07-01 05:45:15.716 g = self._dump('-', a, alo, ahi)
2025-07-01 05:45:15.724 elif blo < bhi:
2025-07-01 05:45:15.731 g = self._dump('+', b, blo, bhi)
2025-07-01 05:45:15.738
2025-07-01 05:45:15.745 > yield from g
2025-07-01 05:45:15.752
2025-07-01 05:45:15.759 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:45:15.766 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:45:15.773
2025-07-01 05:45:15.780 self = <difflib.Differ object at [hex]>
2025-07-01 05:45:15.789 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:45:15.796 alo = 81, ahi = 1101
2025-07-01 05:45:15.809 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:45:15.820 blo = 81, bhi = 1101
2025-07-01 05:45:15.827
2025-07-01 05:45:15.834 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:45:15.844 r"""
2025-07-01 05:45:15.854 When replacing one block of lines with another, search the blocks
2025-07-01 05:45:15.863 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:45:15.871 synch point, and intraline difference marking is done on the
2025-07-01 05:45:15.880 similar pair. Lots of work, but often worth it.
2025-07-01 05:45:15.890
2025-07-01 05:45:15.899 Example:
2025-07-01 05:45:15.907
2025-07-01 05:45:15.918 >>> d = Differ()
2025-07-01 05:45:15.929 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:45:15.940 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:45:15.949 >>> print(''.join(results), end="")
2025-07-01 05:45:15.957 - abcDefghiJkl
2025-07-01 05:45:15.968 + abcdefGhijkl
2025-07-01 05:45:15.977 """
2025-07-01 05:45:15.981
2025-07-01 05:45:15.986 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:45:15.990 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:45:15.996 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:45:16.004 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:45:16.010 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:45:16.019
2025-07-01 05:45:16.030 # search for the pair that matches best without being identical
2025-07-01 05:45:16.042 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:45:16.049 # on junk -- unless we have to)
2025-07-01 05:45:16.055 for j in range(blo, bhi):
2025-07-01 05:45:16.061 bj = b[j]
2025-07-01 05:45:16.070 cruncher.set_seq2(bj)
2025-07-01 05:45:16.079 for i in range(alo, ahi):
2025-07-01 05:45:16.088 ai = a[i]
2025-07-01 05:45:16.097 if ai == bj:
2025-07-01 05:45:16.107 if eqi is None:
2025-07-01 05:45:16.120 eqi, eqj = i, j
2025-07-01 05:45:16.130 continue
2025-07-01 05:45:16.139 cruncher.set_seq1(ai)
2025-07-01 05:45:16.146 # computing similarity is expensive, so use the quick
2025-07-01 05:45:16.157 # upper bounds first -- have seen this speed up messy
2025-07-01 05:45:16.169 # compares by a factor of 3.
2025-07-01 05:45:16.181 # note that ratio() is only expensive to compute the first
2025-07-01 05:45:16.193 # time it's called on a sequence pair; the expensive part
2025-07-01 05:45:16.202 # of the computation is cached by cruncher
2025-07-01 05:45:16.208 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:45:16.214 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:45:16.219 cruncher.ratio() > best_ratio:
2025-07-01 05:45:16.225 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:45:16.231 if best_ratio < cutoff:
2025-07-01 05:45:16.238 # no non-identical "pretty close" pair
2025-07-01 05:45:16.244 if eqi is None:
2025-07-01 05:45:16.252 # no identical pair either -- treat it as a straight replace
2025-07-01 05:45:16.259 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:45:16.268 return
2025-07-01 05:45:16.279 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:45:16.287 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:45:16.299 else:
2025-07-01 05:45:16.307 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:45:16.314 eqi = None
2025-07-01 05:45:16.321
2025-07-01 05:45:16.328 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:45:16.338 # identical
2025-07-01 05:45:16.349
2025-07-01 05:45:16.358 # pump out diffs from before the synch point
2025-07-01 05:45:16.366 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:45:16.373
2025-07-01 05:45:16.386 # do intraline marking on the synch pair
2025-07-01 05:45:16.394 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:45:16.405 if eqi is None:
2025-07-01 05:45:16.412 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:45:16.425 atags = btags = ""
2025-07-01 05:45:16.432 cruncher.set_seqs(aelt, belt)
2025-07-01 05:45:16.441 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:45:16.450 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:45:16.459 if tag == 'replace':
2025-07-01 05:45:16.466 atags += '^' * la
2025-07-01 05:45:16.473 btags += '^' * lb
2025-07-01 05:45:16.480 elif tag == 'delete':
2025-07-01 05:45:16.487 atags += '-' * la
2025-07-01 05:45:16.495 elif tag == 'insert':
2025-07-01 05:45:16.502 btags += '+' * lb
2025-07-01 05:45:16.510 elif tag == 'equal':
2025-07-01 05:45:16.522 atags += ' ' * la
2025-07-01 05:45:16.530 btags += ' ' * lb
2025-07-01 05:45:16.540 else:
2025-07-01 05:45:16.549 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:45:16.558 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:45:16.566 else:
2025-07-01 05:45:16.573 # the synch pair is identical
2025-07-01 05:45:16.580 yield ' ' + aelt
2025-07-01 05:45:16.585
2025-07-01 05:45:16.591 # pump out diffs from after the synch point
2025-07-01 05:45:16.597 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:45:16.602
2025-07-01 05:45:16.608 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:45:16.613 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:45:16.619
2025-07-01 05:45:16.624 self = <difflib.Differ object at [hex]>
2025-07-01 05:45:16.631 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:45:16.638 alo = 82, ahi = 1101
2025-07-01 05:45:16.651 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:45:16.659 blo = 82, bhi = 1101
2025-07-01 05:45:16.666
2025-07-01 05:45:16.672 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:45:16.679 g = []
2025-07-01 05:45:16.690 if alo < ahi:
2025-07-01 05:45:16.696 if blo < bhi:
2025-07-01 05:45:16.702 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:45:16.714 else:
2025-07-01 05:45:16.723 g = self._dump('-', a, alo, ahi)
2025-07-01 05:45:16.735 elif blo < bhi:
2025-07-01 05:45:16.747 g = self._dump('+', b, blo, bhi)
2025-07-01 05:45:16.760
2025-07-01 05:45:16.771 > yield from g
2025-07-01 05:45:16.779
2025-07-01 05:45:16.787 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:45:16.797 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:45:16.809
2025-07-01 05:45:16.821 self = <difflib.Differ object at [hex]>
2025-07-01 05:45:16.836 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:45:16.845 alo = 82, ahi = 1101
2025-07-01 05:45:16.855 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:45:16.862 blo = 82, bhi = 1101
2025-07-01 05:45:16.870
2025-07-01 05:45:16.879 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:45:16.890 r"""
2025-07-01 05:45:16.900 When replacing one block of lines with another, search the blocks
2025-07-01 05:45:16.909 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:45:16.917 synch point, and intraline difference marking is done on the
2025-07-01 05:45:16.923 similar pair. Lots of work, but often worth it.
2025-07-01 05:45:16.929
2025-07-01 05:45:16.935 Example:
2025-07-01 05:45:16.940
2025-07-01 05:45:16.946 >>> d = Differ()
2025-07-01 05:45:16.951 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:45:16.957 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:45:16.973 >>> print(''.join(results), end="")
2025-07-01 05:45:16.983 - abcDefghiJkl
2025-07-01 05:45:17.000 + abcdefGhijkl
2025-07-01 05:45:17.016 """
2025-07-01 05:45:17.023
2025-07-01 05:45:17.030 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:45:17.037 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:45:17.049 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:45:17.059 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:45:17.068 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:45:17.074
2025-07-01 05:45:17.082 # search for the pair that matches best without being identical
2025-07-01 05:45:17.089 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:45:17.098 # on junk -- unless we have to)
2025-07-01 05:45:17.107 for j in range(blo, bhi):
2025-07-01 05:45:17.116 bj = b[j]
2025-07-01 05:45:17.124 cruncher.set_seq2(bj)
2025-07-01 05:45:17.134 for i in range(alo, ahi):
2025-07-01 05:45:17.144 ai = a[i]
2025-07-01 05:45:17.152 if ai == bj:
2025-07-01 05:45:17.159 if eqi is None:
2025-07-01 05:45:17.164 eqi, eqj = i, j
2025-07-01 05:45:17.175 continue
2025-07-01 05:45:17.186 cruncher.set_seq1(ai)
2025-07-01 05:45:17.197 # computing similarity is expensive, so use the quick
2025-07-01 05:45:17.204 # upper bounds first -- have seen this speed up messy
2025-07-01 05:45:17.211 # compares by a factor of 3.
2025-07-01 05:45:17.219 # note that ratio() is only expensive to compute the first
2025-07-01 05:45:17.228 # time it's called on a sequence pair; the expensive part
2025-07-01 05:45:17.243 # of the computation is cached by cruncher
2025-07-01 05:45:17.255 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:45:17.265 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:45:17.274 cruncher.ratio() > best_ratio:
2025-07-01 05:45:17.283 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:45:17.294 if best_ratio < cutoff:
2025-07-01 05:45:17.310 # no non-identical "pretty close" pair
2025-07-01 05:45:17.323 if eqi is None:
2025-07-01 05:45:17.333 # no identical pair either -- treat it as a straight replace
2025-07-01 05:45:17.344 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:45:17.359 return
2025-07-01 05:45:17.375 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:45:17.386 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:45:17.396 else:
2025-07-01 05:45:17.405 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:45:17.411 eqi = None
2025-07-01 05:45:17.417
2025-07-01 05:45:17.426 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:45:17.434 # identical
2025-07-01 05:45:17.446
2025-07-01 05:45:17.456 # pump out diffs from before the synch point
2025-07-01 05:45:17.464 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:45:17.472
2025-07-01 05:45:17.479 # do intraline marking on the synch pair
2025-07-01 05:45:17.487 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:45:17.498 if eqi is None:
2025-07-01 05:45:17.510 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:45:17.520 atags = btags = ""
2025-07-01 05:45:17.534 cruncher.set_seqs(aelt, belt)
2025-07-01 05:45:17.545 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:45:17.556 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:45:17.568 if tag == 'replace':
2025-07-01 05:45:17.585 atags += '^' * la
2025-07-01 05:45:17.595 btags += '^' * lb
2025-07-01 05:45:17.603 elif tag == 'delete':
2025-07-01 05:45:17.613 atags += '-' * la
2025-07-01 05:45:17.627 elif tag == 'insert':
2025-07-01 05:45:17.639 btags += '+' * lb
2025-07-01 05:45:17.649 elif tag == 'equal':
2025-07-01 05:45:17.658 atags += ' ' * la
2025-07-01 05:45:17.668 btags += ' ' * lb
2025-07-01 05:45:17.680 else:
2025-07-01 05:45:17.692 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:45:17.703 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:45:17.712 else:
2025-07-01 05:45:17.721 # the synch pair is identical
2025-07-01 05:45:17.727 yield ' ' + aelt
2025-07-01 05:45:17.737
2025-07-01 05:45:17.747 # pump out diffs from after the synch point
2025-07-01 05:45:17.759 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:45:17.768
2025-07-01 05:45:17.782 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:45:17.794 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:45:17.805
2025-07-01 05:45:17.814 self = <difflib.Differ object at [hex]>
2025-07-01 05:45:17.828 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:45:17.839 alo = 83, ahi = 1101
2025-07-01 05:45:17.850 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:45:17.862 blo = 83, bhi = 1101
2025-07-01 05:45:17.873
2025-07-01 05:45:17.881 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:45:17.888 g = []
2025-07-01 05:45:17.894 if alo < ahi:
2025-07-01 05:45:17.900 if blo < bhi:
2025-07-01 05:45:17.906 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:45:17.911 else:
2025-07-01 05:45:17.919 g = self._dump('-', a, alo, ahi)
2025-07-01 05:45:17.930 elif blo < bhi:
2025-07-01 05:45:17.941 g = self._dump('+', b, blo, bhi)
2025-07-01 05:45:17.950
2025-07-01 05:45:17.961 > yield from g
2025-07-01 05:45:17.971
2025-07-01 05:45:17.980 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:45:17.990 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:45:18.000
2025-07-01 05:45:18.008 self = <difflib.Differ object at [hex]>
2025-07-01 05:45:18.021 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:45:18.029 alo = 83, ahi = 1101
2025-07-01 05:45:18.038 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:45:18.044 blo = 83, bhi = 1101
2025-07-01 05:45:18.050
2025-07-01 05:45:18.055 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:45:18.062 r"""
2025-07-01 05:45:18.071 When replacing one block of lines with another, search the blocks
2025-07-01 05:45:18.078 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:45:18.085 synch point, and intraline difference marking is done on the
2025-07-01 05:45:18.091 similar pair. Lots of work, but often worth it.
2025-07-01 05:45:18.099
2025-07-01 05:45:18.110 Example:
2025-07-01 05:45:18.122
2025-07-01 05:45:18.131 >>> d = Differ()
2025-07-01 05:45:18.140 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:45:18.147 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:45:18.154 >>> print(''.join(results), end="")
2025-07-01 05:45:18.160 - abcDefghiJkl
2025-07-01 05:45:18.181 + abcdefGhijkl
2025-07-01 05:45:18.205 """
2025-07-01 05:45:18.218
2025-07-01 05:45:18.228 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:45:18.236 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:45:18.244 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:45:18.251 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:45:18.258 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:45:18.267
2025-07-01 05:45:18.276 # search for the pair that matches best without being identical
2025-07-01 05:45:18.283 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:45:18.294 # on junk -- unless we have to)
2025-07-01 05:45:18.302 for j in range(blo, bhi):
2025-07-01 05:45:18.308 bj = b[j]
2025-07-01 05:45:18.315 cruncher.set_seq2(bj)
2025-07-01 05:45:18.322 for i in range(alo, ahi):
2025-07-01 05:45:18.333 ai = a[i]
2025-07-01 05:45:18.343 if ai == bj:
2025-07-01 05:45:18.353 if eqi is None:
2025-07-01 05:45:18.360 eqi, eqj = i, j
2025-07-01 05:45:18.366 continue
2025-07-01 05:45:18.373 cruncher.set_seq1(ai)
2025-07-01 05:45:18.384 # computing similarity is expensive, so use the quick
2025-07-01 05:45:18.395 # upper bounds first -- have seen this speed up messy
2025-07-01 05:45:18.406 # compares by a factor of 3.
2025-07-01 05:45:18.413 # note that ratio() is only expensive to compute the first
2025-07-01 05:45:18.419 # time it's called on a sequence pair; the expensive part
2025-07-01 05:45:18.426 # of the computation is cached by cruncher
2025-07-01 05:45:18.435 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:45:18.442 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:45:18.453 cruncher.ratio() > best_ratio:
2025-07-01 05:45:18.461 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:45:18.467 if best_ratio < cutoff:
2025-07-01 05:45:18.473 # no non-identical "pretty close" pair
2025-07-01 05:45:18.477 if eqi is None:
2025-07-01 05:45:18.482 # no identical pair either -- treat it as a straight replace
2025-07-01 05:45:18.487 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:45:18.493 return
2025-07-01 05:45:18.500 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:45:18.507 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:45:18.514 else:
2025-07-01 05:45:18.525 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:45:18.533 eqi = None
2025-07-01 05:45:18.539
2025-07-01 05:45:18.546 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:45:18.555 # identical
2025-07-01 05:45:18.566
2025-07-01 05:45:18.574 # pump out diffs from before the synch point
2025-07-01 05:45:18.580 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:45:18.586
2025-07-01 05:45:18.595 # do intraline marking on the synch pair
2025-07-01 05:45:18.603 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:45:18.610 if eqi is None:
2025-07-01 05:45:18.617 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:45:18.623 atags = btags = ""
2025-07-01 05:45:18.630 cruncher.set_seqs(aelt, belt)
2025-07-01 05:45:18.636 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:45:18.641 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:45:18.646 if tag == 'replace':
2025-07-01 05:45:18.650 atags += '^' * la
2025-07-01 05:45:18.657 btags += '^' * lb
2025-07-01 05:45:18.663 elif tag == 'delete':
2025-07-01 05:45:18.669 atags += '-' * la
2025-07-01 05:45:18.675 elif tag == 'insert':
2025-07-01 05:45:18.683 btags += '+' * lb
2025-07-01 05:45:18.691 elif tag == 'equal':
2025-07-01 05:45:18.698 atags += ' ' * la
2025-07-01 05:45:18.704 btags += ' ' * lb
2025-07-01 05:45:18.711 else:
2025-07-01 05:45:18.716 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:45:18.722 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:45:18.727 else:
2025-07-01 05:45:18.735 # the synch pair is identical
2025-07-01 05:45:18.746 yield ' ' + aelt
2025-07-01 05:45:18.756
2025-07-01 05:45:18.764 # pump out diffs from after the synch point
2025-07-01 05:45:18.774 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:45:18.788
2025-07-01 05:45:18.800 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:45:18.809 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:45:18.816
2025-07-01 05:45:18.823 self = <difflib.Differ object at [hex]>
2025-07-01 05:45:18.833 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:45:18.840 alo = 84, ahi = 1101
2025-07-01 05:45:18.854 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:45:18.866 blo = 84, bhi = 1101
2025-07-01 05:45:18.876
2025-07-01 05:45:18.885 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:45:18.891 g = []
2025-07-01 05:45:18.899 if alo < ahi:
2025-07-01 05:45:18.907 if blo < bhi:
2025-07-01 05:45:18.920 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:45:18.928 else:
2025-07-01 05:45:18.934 g = self._dump('-', a, alo, ahi)
2025-07-01 05:45:18.940 elif blo < bhi:
2025-07-01 05:45:18.945 g = self._dump('+', b, blo, bhi)
2025-07-01 05:45:18.951
2025-07-01 05:45:18.957 > yield from g
2025-07-01 05:45:18.963
2025-07-01 05:45:18.969 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:45:18.977 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:45:18.983
2025-07-01 05:45:18.989 self = <difflib.Differ object at [hex]>
2025-07-01 05:45:18.995 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:45:19.000 alo = 84, ahi = 1101
2025-07-01 05:45:19.007 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:45:19.014 blo = 84, bhi = 1101
2025-07-01 05:45:19.019
2025-07-01 05:45:19.026 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:45:19.033 r"""
2025-07-01 05:45:19.042 When replacing one block of lines with another, search the blocks
2025-07-01 05:45:19.048 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:45:19.054 synch point, and intraline difference marking is done on the
2025-07-01 05:45:19.059 similar pair. Lots of work, but often worth it.
2025-07-01 05:45:19.063
2025-07-01 05:45:19.068 Example:
2025-07-01 05:45:19.073
2025-07-01 05:45:19.078 >>> d = Differ()
2025-07-01 05:45:19.083 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:45:19.088 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:45:19.093 >>> print(''.join(results), end="")
2025-07-01 05:45:19.098 - abcDefghiJkl
2025-07-01 05:45:19.109 + abcdefGhijkl
2025-07-01 05:45:19.122 """
2025-07-01 05:45:19.130
2025-07-01 05:45:19.141 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:45:19.154 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:45:19.163 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:45:19.171 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:45:19.179 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:45:19.186
2025-07-01 05:45:19.195 # search for the pair that matches best without being identical
2025-07-01 05:45:19.202 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:45:19.215 # on junk -- unless we have to)
2025-07-01 05:45:19.226 for j in range(blo, bhi):
2025-07-01 05:45:19.234 bj = b[j]
2025-07-01 05:45:19.241 cruncher.set_seq2(bj)
2025-07-01 05:45:19.248 for i in range(alo, ahi):
2025-07-01 05:45:19.255 ai = a[i]
2025-07-01 05:45:19.264 if ai == bj:
2025-07-01 05:45:19.273 if eqi is None:
2025-07-01 05:45:19.281 eqi, eqj = i, j
2025-07-01 05:45:19.289 continue
2025-07-01 05:45:19.295 cruncher.set_seq1(ai)
2025-07-01 05:45:19.301 # computing similarity is expensive, so use the quick
2025-07-01 05:45:19.310 # upper bounds first -- have seen this speed up messy
2025-07-01 05:45:19.319 # compares by a factor of 3.
2025-07-01 05:45:19.326 # note that ratio() is only expensive to compute the first
2025-07-01 05:45:19.336 # time it's called on a sequence pair; the expensive part
2025-07-01 05:45:19.347 # of the computation is cached by cruncher
2025-07-01 05:45:19.360 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:45:19.371 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:45:19.382 cruncher.ratio() > best_ratio:
2025-07-01 05:45:19.390 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:45:19.397 if best_ratio < cutoff:
2025-07-01 05:45:19.405 # no non-identical "pretty close" pair
2025-07-01 05:45:19.411 if eqi is None:
2025-07-01 05:45:19.421 # no identical pair either -- treat it as a straight replace
2025-07-01 05:45:19.432 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:45:19.445 return
2025-07-01 05:45:19.454 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:45:19.462 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:45:19.470 else:
2025-07-01 05:45:19.477 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:45:19.483 eqi = None
2025-07-01 05:45:19.489
2025-07-01 05:45:19.495 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:45:19.501 # identical
2025-07-01 05:45:19.507
2025-07-01 05:45:19.514 # pump out diffs from before the synch point
2025-07-01 05:45:19.524 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:45:19.532
2025-07-01 05:45:19.539 # do intraline marking on the synch pair
2025-07-01 05:45:19.545 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:45:19.551 if eqi is None:
2025-07-01 05:45:19.557 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:45:19.565 atags = btags = ""
2025-07-01 05:45:19.572 cruncher.set_seqs(aelt, belt)
2025-07-01 05:45:19.580 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:45:19.587 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:45:19.594 if tag == 'replace':
2025-07-01 05:45:19.601 atags += '^' * la
2025-07-01 05:45:19.608 btags += '^' * lb
2025-07-01 05:45:19.615 elif tag == 'delete':
2025-07-01 05:45:19.623 atags += '-' * la
2025-07-01 05:45:19.635 elif tag == 'insert':
2025-07-01 05:45:19.643 btags += '+' * lb
2025-07-01 05:45:19.649 elif tag == 'equal':
2025-07-01 05:45:19.655 atags += ' ' * la
2025-07-01 05:45:19.660 btags += ' ' * lb
2025-07-01 05:45:19.665 else:
2025-07-01 05:45:19.671 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:45:19.678 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:45:19.684 else:
2025-07-01 05:45:19.690 # the synch pair is identical
2025-07-01 05:45:19.699 yield ' ' + aelt
2025-07-01 05:45:19.706
2025-07-01 05:45:19.714 # pump out diffs from after the synch point
2025-07-01 05:45:19.725 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:45:19.735
2025-07-01 05:45:19.742 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:45:19.748 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:45:19.756
2025-07-01 05:45:19.762 self = <difflib.Differ object at [hex]>
2025-07-01 05:45:19.770 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:45:19.775 alo = 85, ahi = 1101
2025-07-01 05:45:19.788 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:45:19.797 blo = 85, bhi = 1101
2025-07-01 05:45:19.805
2025-07-01 05:45:19.812 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:45:19.818 g = []
2025-07-01 05:45:19.829 if alo < ahi:
2025-07-01 05:45:19.837 if blo < bhi:
2025-07-01 05:45:19.844 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:45:19.853 else:
2025-07-01 05:45:19.866 g = self._dump('-', a, alo, ahi)
2025-07-01 05:45:19.880 elif blo < bhi:
2025-07-01 05:45:19.892 g = self._dump('+', b, blo, bhi)
2025-07-01 05:45:19.900
2025-07-01 05:45:19.906 > yield from g
2025-07-01 05:45:19.911
2025-07-01 05:45:19.916 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:45:19.923 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:45:19.931
2025-07-01 05:45:19.942 self = <difflib.Differ object at [hex]>
2025-07-01 05:45:19.951 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:45:19.958 alo = 85, ahi = 1101
2025-07-01 05:45:19.965 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:45:19.971 blo = 85, bhi = 1101
2025-07-01 05:45:19.978
2025-07-01 05:45:19.988 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:45:19.997 r"""
2025-07-01 05:45:20.005 When replacing one block of lines with another, search the blocks
2025-07-01 05:45:20.013 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:45:20.020 synch point, and intraline difference marking is done on the
2025-07-01 05:45:20.026 similar pair. Lots of work, but often worth it.
2025-07-01 05:45:20.032
2025-07-01 05:45:20.040 Example:
2025-07-01 05:45:20.046
2025-07-01 05:45:20.053 >>> d = Differ()
2025-07-01 05:45:20.059 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:45:20.064 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:45:20.069 >>> print(''.join(results), end="")
2025-07-01 05:45:20.075 - abcDefghiJkl
2025-07-01 05:45:20.087 + abcdefGhijkl
2025-07-01 05:45:20.110 """
2025-07-01 05:45:20.119
2025-07-01 05:45:20.127 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:45:20.138 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:45:20.149 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:45:20.161 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:45:20.172 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:45:20.184
2025-07-01 05:45:20.197 # search for the pair that matches best without being identical
2025-07-01 05:45:20.207 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:45:20.216 # on junk -- unless we have to)
2025-07-01 05:45:20.223 for j in range(blo, bhi):
2025-07-01 05:45:20.231 bj = b[j]
2025-07-01 05:45:20.241 cruncher.set_seq2(bj)
2025-07-01 05:45:20.250 for i in range(alo, ahi):
2025-07-01 05:45:20.260 ai = a[i]
2025-07-01 05:45:20.270 if ai == bj:
2025-07-01 05:45:20.280 if eqi is None:
2025-07-01 05:45:20.287 eqi, eqj = i, j
2025-07-01 05:45:20.294 continue
2025-07-01 05:45:20.301 cruncher.set_seq1(ai)
2025-07-01 05:45:20.307 # computing similarity is expensive, so use the quick
2025-07-01 05:45:20.315 # upper bounds first -- have seen this speed up messy
2025-07-01 05:45:20.323 # compares by a factor of 3.
2025-07-01 05:45:20.337 # note that ratio() is only expensive to compute the first
2025-07-01 05:45:20.347 # time it's called on a sequence pair; the expensive part
2025-07-01 05:45:20.354 # of the computation is cached by cruncher
2025-07-01 05:45:20.359 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:45:20.364 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:45:20.370 cruncher.ratio() > best_ratio:
2025-07-01 05:45:20.375 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:45:20.381 if best_ratio < cutoff:
2025-07-01 05:45:20.387 # no non-identical "pretty close" pair
2025-07-01 05:45:20.397 if eqi is None:
2025-07-01 05:45:20.406 # no identical pair either -- treat it as a straight replace
2025-07-01 05:45:20.419 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:45:20.433 return
2025-07-01 05:45:20.444 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:45:20.452 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:45:20.460 else:
2025-07-01 05:45:20.466 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:45:20.472 eqi = None
2025-07-01 05:45:20.478
2025-07-01 05:45:20.484 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:45:20.491 # identical
2025-07-01 05:45:20.498
2025-07-01 05:45:20.505 # pump out diffs from before the synch point
2025-07-01 05:45:20.512 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:45:20.519
2025-07-01 05:45:20.528 # do intraline marking on the synch pair
2025-07-01 05:45:20.541 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:45:20.551 if eqi is None:
2025-07-01 05:45:20.559 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:45:20.567 atags = btags = ""
2025-07-01 05:45:20.574 cruncher.set_seqs(aelt, belt)
2025-07-01 05:45:20.585 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:45:20.595 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:45:20.603 if tag == 'replace':
2025-07-01 05:45:20.610 atags += '^' * la
2025-07-01 05:45:20.616 btags += '^' * lb
2025-07-01 05:45:20.622 elif tag == 'delete':
2025-07-01 05:45:20.628 atags += '-' * la
2025-07-01 05:45:20.634 elif tag == 'insert':
2025-07-01 05:45:20.645 btags += '+' * lb
2025-07-01 05:45:20.654 elif tag == 'equal':
2025-07-01 05:45:20.661 atags += ' ' * la
2025-07-01 05:45:20.668 btags += ' ' * lb
2025-07-01 05:45:20.674 else:
2025-07-01 05:45:20.686 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:45:20.696 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:45:20.705 else:
2025-07-01 05:45:20.717 # the synch pair is identical
2025-07-01 05:45:20.726 yield ' ' + aelt
2025-07-01 05:45:20.734
2025-07-01 05:45:20.742 # pump out diffs from after the synch point
2025-07-01 05:45:20.751 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:45:20.760
2025-07-01 05:45:20.767 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:45:20.774 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:45:20.780
2025-07-01 05:45:20.786 self = <difflib.Differ object at [hex]>
2025-07-01 05:45:20.797 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:45:20.807 alo = 86, ahi = 1101
2025-07-01 05:45:20.816 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:45:20.822 blo = 86, bhi = 1101
2025-07-01 05:45:20.828
2025-07-01 05:45:20.834 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:45:20.841 g = []
2025-07-01 05:45:20.849 if alo < ahi:
2025-07-01 05:45:20.860 if blo < bhi:
2025-07-01 05:45:20.868 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:45:20.875 else:
2025-07-01 05:45:20.881 g = self._dump('-', a, alo, ahi)
2025-07-01 05:45:20.888 elif blo < bhi:
2025-07-01 05:45:20.894 g = self._dump('+', b, blo, bhi)
2025-07-01 05:45:20.899
2025-07-01 05:45:20.907 > yield from g
2025-07-01 05:45:20.917
2025-07-01 05:45:20.925 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:45:20.933 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:45:20.940
2025-07-01 05:45:20.954 self = <difflib.Differ object at [hex]>
2025-07-01 05:45:20.963 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:45:20.971 alo = 86, ahi = 1101
2025-07-01 05:45:20.983 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:45:20.994 blo = 86, bhi = 1101
2025-07-01 05:45:21.005
2025-07-01 05:45:21.015 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:45:21.024 r"""
2025-07-01 05:45:21.031 When replacing one block of lines with another, search the blocks
2025-07-01 05:45:21.037 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:45:21.048 synch point, and intraline difference marking is done on the
2025-07-01 05:45:21.059 similar pair. Lots of work, but often worth it.
2025-07-01 05:45:21.068
2025-07-01 05:45:21.075 Example:
2025-07-01 05:45:21.081
2025-07-01 05:45:21.087 >>> d = Differ()
2025-07-01 05:45:21.093 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:45:21.099 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:45:21.105 >>> print(''.join(results), end="")
2025-07-01 05:45:21.117 - abcDefghiJkl
2025-07-01 05:45:21.132 + abcdefGhijkl
2025-07-01 05:45:21.149 """
2025-07-01 05:45:21.160
2025-07-01 05:45:21.172 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:45:21.184 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:45:21.193 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:45:21.205 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:45:21.217 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:45:21.228
2025-07-01 05:45:21.239 # search for the pair that matches best without being identical
2025-07-01 05:45:21.249 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:45:21.263 # on junk -- unless we have to)
2025-07-01 05:45:21.274 for j in range(blo, bhi):
2025-07-01 05:45:21.283 bj = b[j]
2025-07-01 05:45:21.291 cruncher.set_seq2(bj)
2025-07-01 05:45:21.299 for i in range(alo, ahi):
2025-07-01 05:45:21.310 ai = a[i]
2025-07-01 05:45:21.319 if ai == bj:
2025-07-01 05:45:21.328 if eqi is None:
2025-07-01 05:45:21.335 eqi, eqj = i, j
2025-07-01 05:45:21.342 continue
2025-07-01 05:45:21.349 cruncher.set_seq1(ai)
2025-07-01 05:45:21.355 # computing similarity is expensive, so use the quick
2025-07-01 05:45:21.361 # upper bounds first -- have seen this speed up messy
2025-07-01 05:45:21.370 # compares by a factor of 3.
2025-07-01 05:45:21.381 # note that ratio() is only expensive to compute the first
2025-07-01 05:45:21.389 # time it's called on a sequence pair; the expensive part
2025-07-01 05:45:21.395 # of the computation is cached by cruncher
2025-07-01 05:45:21.402 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:45:21.408 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:45:21.414 cruncher.ratio() > best_ratio:
2025-07-01 05:45:21.420 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:45:21.430 if best_ratio < cutoff:
2025-07-01 05:45:21.440 # no non-identical "pretty close" pair
2025-07-01 05:45:21.446 if eqi is None:
2025-07-01 05:45:21.452 # no identical pair either -- treat it as a straight replace
2025-07-01 05:45:21.458 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:45:21.468 return
2025-07-01 05:45:21.477 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:45:21.484 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:45:21.490 else:
2025-07-01 05:45:21.500 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:45:21.509 eqi = None
2025-07-01 05:45:21.516
2025-07-01 05:45:21.523 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:45:21.531 # identical
2025-07-01 05:45:21.541
2025-07-01 05:45:21.554 # pump out diffs from before the synch point
2025-07-01 05:45:21.564 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:45:21.574
2025-07-01 05:45:21.581 # do intraline marking on the synch pair
2025-07-01 05:45:21.587 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:45:21.596 if eqi is None:
2025-07-01 05:45:21.607 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:45:21.615 atags = btags = ""
2025-07-01 05:45:21.622 cruncher.set_seqs(aelt, belt)
2025-07-01 05:45:21.633 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:45:21.644 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:45:21.652 if tag == 'replace':
2025-07-01 05:45:21.659 atags += '^' * la
2025-07-01 05:45:21.666 btags += '^' * lb
2025-07-01 05:45:21.679 elif tag == 'delete':
2025-07-01 05:45:21.691 atags += '-' * la
2025-07-01 05:45:21.698 elif tag == 'insert':
2025-07-01 05:45:21.704 btags += '+' * lb
2025-07-01 05:45:21.711 elif tag == 'equal':
2025-07-01 05:45:21.719 atags += ' ' * la
2025-07-01 05:45:21.725 btags += ' ' * lb
2025-07-01 05:45:21.732 else:
2025-07-01 05:45:21.739 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:45:21.747 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:45:21.758 else:
2025-07-01 05:45:21.771 # the synch pair is identical
2025-07-01 05:45:21.781 yield ' ' + aelt
2025-07-01 05:45:21.794
2025-07-01 05:45:21.803 # pump out diffs from after the synch point
2025-07-01 05:45:21.811 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:45:21.826
2025-07-01 05:45:21.839 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:45:21.850 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:45:21.861
2025-07-01 05:45:21.872 self = <difflib.Differ object at [hex]>
2025-07-01 05:45:21.886 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:45:21.900 alo = 87, ahi = 1101
2025-07-01 05:45:21.916 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:45:21.928 blo = 87, bhi = 1101
2025-07-01 05:45:21.935
2025-07-01 05:45:21.943 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:45:21.949 g = []
2025-07-01 05:45:21.958 if alo < ahi:
2025-07-01 05:45:21.963 if blo < bhi:
2025-07-01 05:45:21.977 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:45:21.988 else:
2025-07-01 05:45:22.002 g = self._dump('-', a, alo, ahi)
2025-07-01 05:45:22.014 elif blo < bhi:
2025-07-01 05:45:22.023 g = self._dump('+', b, blo, bhi)
2025-07-01 05:45:22.031
2025-07-01 05:45:22.038 > yield from g
2025-07-01 05:45:22.045
2025-07-01 05:45:22.054 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:45:22.064 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:45:22.073
2025-07-01 05:45:22.080 self = <difflib.Differ object at [hex]>
2025-07-01 05:45:22.087 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:45:22.098 alo = 87, ahi = 1101
2025-07-01 05:45:22.109 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:45:22.116 blo = 87, bhi = 1101
2025-07-01 05:45:22.123
2025-07-01 05:45:22.129 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:45:22.134 r"""
2025-07-01 05:45:22.141 When replacing one block of lines with another, search the blocks
2025-07-01 05:45:22.148 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:45:22.155 synch point, and intraline difference marking is done on the
2025-07-01 05:45:22.161 similar pair. Lots of work, but often worth it.
2025-07-01 05:45:22.168
2025-07-01 05:45:22.175 Example:
2025-07-01 05:45:22.184
2025-07-01 05:45:22.195 >>> d = Differ()
2025-07-01 05:45:22.204 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:45:22.212 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:45:22.218 >>> print(''.join(results), end="")
2025-07-01 05:45:22.224 - abcDefghiJkl
2025-07-01 05:45:22.235 + abcdefGhijkl
2025-07-01 05:45:22.248 """
2025-07-01 05:45:22.254
2025-07-01 05:45:22.260 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:45:22.266 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:45:22.272 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:45:22.282 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:45:22.294 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:45:22.303
2025-07-01 05:45:22.309 # search for the pair that matches best without being identical
2025-07-01 05:45:22.317 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:45:22.324 # on junk -- unless we have to)
2025-07-01 05:45:22.330 for j in range(blo, bhi):
2025-07-01 05:45:22.335 bj = b[j]
2025-07-01 05:45:22.340 cruncher.set_seq2(bj)
2025-07-01 05:45:22.346 for i in range(alo, ahi):
2025-07-01 05:45:22.352 ai = a[i]
2025-07-01 05:45:22.359 if ai == bj:
2025-07-01 05:45:22.366 if eqi is None:
2025-07-01 05:45:22.374 eqi, eqj = i, j
2025-07-01 05:45:22.383 continue
2025-07-01 05:45:22.390 cruncher.set_seq1(ai)
2025-07-01 05:45:22.396 # computing similarity is expensive, so use the quick
2025-07-01 05:45:22.402 # upper bounds first -- have seen this speed up messy
2025-07-01 05:45:22.408 # compares by a factor of 3.
2025-07-01 05:45:22.414 # note that ratio() is only expensive to compute the first
2025-07-01 05:45:22.420 # time it's called on a sequence pair; the expensive part
2025-07-01 05:45:22.426 # of the computation is cached by cruncher
2025-07-01 05:45:22.435 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:45:22.444 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:45:22.452 cruncher.ratio() > best_ratio:
2025-07-01 05:45:22.457 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:45:22.462 if best_ratio < cutoff:
2025-07-01 05:45:22.470 # no non-identical "pretty close" pair
2025-07-01 05:45:22.479 if eqi is None:
2025-07-01 05:45:22.486 # no identical pair either -- treat it as a straight replace
2025-07-01 05:45:22.493 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:45:22.498 return
2025-07-01 05:45:22.503 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:45:22.508 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:45:22.513 else:
2025-07-01 05:45:22.518 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:45:22.523 eqi = None
2025-07-01 05:45:22.529
2025-07-01 05:45:22.535 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:45:22.541 # identical
2025-07-01 05:45:22.547
2025-07-01 05:45:22.554 # pump out diffs from before the synch point
2025-07-01 05:45:22.562 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:45:22.568
2025-07-01 05:45:22.575 # do intraline marking on the synch pair
2025-07-01 05:45:22.583 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:45:22.595 if eqi is None:
2025-07-01 05:45:22.602 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:45:22.608 atags = btags = ""
2025-07-01 05:45:22.614 cruncher.set_seqs(aelt, belt)
2025-07-01 05:45:22.619 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:45:22.623 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:45:22.628 if tag == 'replace':
2025-07-01 05:45:22.632 atags += '^' * la
2025-07-01 05:45:22.637 btags += '^' * lb
2025-07-01 05:45:22.641 elif tag == 'delete':
2025-07-01 05:45:22.646 atags += '-' * la
2025-07-01 05:45:22.651 elif tag == 'insert':
2025-07-01 05:45:22.658 btags += '+' * lb
2025-07-01 05:45:22.667 elif tag == 'equal':
2025-07-01 05:45:22.673 atags += ' ' * la
2025-07-01 05:45:22.680 btags += ' ' * lb
2025-07-01 05:45:22.687 else:
2025-07-01 05:45:22.694 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:45:22.703 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:45:22.714 else:
2025-07-01 05:45:22.723 # the synch pair is identical
2025-07-01 05:45:22.732 yield ' ' + aelt
2025-07-01 05:45:22.742
2025-07-01 05:45:22.751 # pump out diffs from after the synch point
2025-07-01 05:45:22.757 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:45:22.763
2025-07-01 05:45:22.769 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:45:22.776 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:45:22.782
2025-07-01 05:45:22.792 self = <difflib.Differ object at [hex]>
2025-07-01 05:45:22.802 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:45:22.810 alo = 88, ahi = 1101
2025-07-01 05:45:22.817 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:45:22.824 blo = 88, bhi = 1101
2025-07-01 05:45:22.831
2025-07-01 05:45:22.838 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:45:22.848 g = []
2025-07-01 05:45:22.857 if alo < ahi:
2025-07-01 05:45:22.864 if blo < bhi:
2025-07-01 05:45:22.871 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:45:22.876 else:
2025-07-01 05:45:22.881 g = self._dump('-', a, alo, ahi)
2025-07-01 05:45:22.886 elif blo < bhi:
2025-07-01 05:45:22.892 g = self._dump('+', b, blo, bhi)
2025-07-01 05:45:22.905
2025-07-01 05:45:22.915 > yield from g
2025-07-01 05:45:22.922
2025-07-01 05:45:22.928 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:45:22.933 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:45:22.938
2025-07-01 05:45:22.943 self = <difflib.Differ object at [hex]>
2025-07-01 05:45:22.948 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:45:22.953 alo = 88, ahi = 1101
2025-07-01 05:45:22.958 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:45:22.963 blo = 88, bhi = 1101
2025-07-01 05:45:22.967
2025-07-01 05:45:22.972 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:45:22.977 r"""
2025-07-01 05:45:22.983 When replacing one block of lines with another, search the blocks
2025-07-01 05:45:22.991 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:45:22.997 synch point, and intraline difference marking is done on the
2025-07-01 05:45:23.004 similar pair. Lots of work, but often worth it.
2025-07-01 05:45:23.011
2025-07-01 05:45:23.017 Example:
2025-07-01 05:45:23.023
2025-07-01 05:45:23.029 >>> d = Differ()
2025-07-01 05:45:23.035 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:45:23.041 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:45:23.047 >>> print(''.join(results), end="")
2025-07-01 05:45:23.053 - abcDefghiJkl
2025-07-01 05:45:23.064 + abcdefGhijkl
2025-07-01 05:45:23.076 """
2025-07-01 05:45:23.082
2025-07-01 05:45:23.088 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:45:23.095 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:45:23.101 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:45:23.108 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:45:23.116 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:45:23.128
2025-07-01 05:45:23.141 # search for the pair that matches best without being identical
2025-07-01 05:45:23.151 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:45:23.163 # on junk -- unless we have to)
2025-07-01 05:45:23.171 for j in range(blo, bhi):
2025-07-01 05:45:23.178 bj = b[j]
2025-07-01 05:45:23.184 cruncher.set_seq2(bj)
2025-07-01 05:45:23.190 for i in range(alo, ahi):
2025-07-01 05:45:23.198 ai = a[i]
2025-07-01 05:45:23.207 if ai == bj:
2025-07-01 05:45:23.215 if eqi is None:
2025-07-01 05:45:23.223 eqi, eqj = i, j
2025-07-01 05:45:23.232 continue
2025-07-01 05:45:23.238 cruncher.set_seq1(ai)
2025-07-01 05:45:23.246 # computing similarity is expensive, so use the quick
2025-07-01 05:45:23.253 # upper bounds first -- have seen this speed up messy
2025-07-01 05:45:23.260 # compares by a factor of 3.
2025-07-01 05:45:23.267 # note that ratio() is only expensive to compute the first
2025-07-01 05:45:23.275 # time it's called on a sequence pair; the expensive part
2025-07-01 05:45:23.282 # of the computation is cached by cruncher
2025-07-01 05:45:23.290 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:45:23.299 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:45:23.309 cruncher.ratio() > best_ratio:
2025-07-01 05:45:23.318 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:45:23.324 if best_ratio < cutoff:
2025-07-01 05:45:23.329 # no non-identical "pretty close" pair
2025-07-01 05:45:23.335 if eqi is None:
2025-07-01 05:45:23.341 # no identical pair either -- treat it as a straight replace
2025-07-01 05:45:23.349 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:45:23.355 return
2025-07-01 05:45:23.362 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:45:23.373 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:45:23.382 else:
2025-07-01 05:45:23.388 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:45:23.393 eqi = None
2025-07-01 05:45:23.399
2025-07-01 05:45:23.408 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:45:23.420 # identical
2025-07-01 05:45:23.429
2025-07-01 05:45:23.435 # pump out diffs from before the synch point
2025-07-01 05:45:23.442 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:45:23.449
2025-07-01 05:45:23.456 # do intraline marking on the synch pair
2025-07-01 05:45:23.463 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:45:23.470 if eqi is None:
2025-07-01 05:45:23.481 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:45:23.493 atags = btags = ""
2025-07-01 05:45:23.502 cruncher.set_seqs(aelt, belt)
2025-07-01 05:45:23.509 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:45:23.516 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:45:23.522 if tag == 'replace':
2025-07-01 05:45:23.529 atags += '^' * la
2025-07-01 05:45:23.540 btags += '^' * lb
2025-07-01 05:45:23.553 elif tag == 'delete':
2025-07-01 05:45:23.563 atags += '-' * la
2025-07-01 05:45:23.570 elif tag == 'insert':
2025-07-01 05:45:23.576 btags += '+' * lb
2025-07-01 05:45:23.588 elif tag == 'equal':
2025-07-01 05:45:23.596 atags += ' ' * la
2025-07-01 05:45:23.603 btags += ' ' * lb
2025-07-01 05:45:23.610 else:
2025-07-01 05:45:23.617 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:45:23.624 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:45:23.631 else:
2025-07-01 05:45:23.640 # the synch pair is identical
2025-07-01 05:45:23.650 yield ' ' + aelt
2025-07-01 05:45:23.658
2025-07-01 05:45:23.664 # pump out diffs from after the synch point
2025-07-01 05:45:23.670 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:45:23.675
2025-07-01 05:45:23.680 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:45:23.685 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:45:23.690
2025-07-01 05:45:23.695 self = <difflib.Differ object at [hex]>
2025-07-01 05:45:23.700 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:45:23.705 alo = 89, ahi = 1101
2025-07-01 05:45:23.710 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:45:23.715 blo = 89, bhi = 1101
2025-07-01 05:45:23.720
2025-07-01 05:45:23.726 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:45:23.732 g = []
2025-07-01 05:45:23.737 if alo < ahi:
2025-07-01 05:45:23.745 if blo < bhi:
2025-07-01 05:45:23.752 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:45:23.758 else:
2025-07-01 05:45:23.769 g = self._dump('-', a, alo, ahi)
2025-07-01 05:45:23.777 elif blo < bhi:
2025-07-01 05:45:23.789 g = self._dump('+', b, blo, bhi)
2025-07-01 05:45:23.798
2025-07-01 05:45:23.805 > yield from g
2025-07-01 05:45:23.811
2025-07-01 05:45:23.816 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:45:23.822 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:45:23.826
2025-07-01 05:45:23.832 self = <difflib.Differ object at [hex]>
2025-07-01 05:45:23.840 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:45:23.846 alo = 89, ahi = 1101
2025-07-01 05:45:23.853 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:45:23.859 blo = 89, bhi = 1101
2025-07-01 05:45:23.866
2025-07-01 05:45:23.873 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:45:23.879 r"""
2025-07-01 05:45:23.888 When replacing one block of lines with another, search the blocks
2025-07-01 05:45:23.900 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:45:23.912 synch point, and intraline difference marking is done on the
2025-07-01 05:45:23.922 similar pair. Lots of work, but often worth it.
2025-07-01 05:45:23.930
2025-07-01 05:45:23.936 Example:
2025-07-01 05:45:23.944
2025-07-01 05:45:23.952 >>> d = Differ()
2025-07-01 05:45:23.959 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:45:23.966 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:45:23.972 >>> print(''.join(results), end="")
2025-07-01 05:45:23.979 - abcDefghiJkl
2025-07-01 05:45:23.990 + abcdefGhijkl
2025-07-01 05:45:24.002 """
2025-07-01 05:45:24.009
2025-07-01 05:45:24.014 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:45:24.022 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:45:24.028 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:45:24.035 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:45:24.042 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:45:24.048
2025-07-01 05:45:24.055 # search for the pair that matches best without being identical
2025-07-01 05:45:24.062 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:45:24.069 # on junk -- unless we have to)
2025-07-01 05:45:24.075 for j in range(blo, bhi):
2025-07-01 05:45:24.081 bj = b[j]
2025-07-01 05:45:24.087 cruncher.set_seq2(bj)
2025-07-01 05:45:24.093 for i in range(alo, ahi):
2025-07-01 05:45:24.105 ai = a[i]
2025-07-01 05:45:24.117 if ai == bj:
2025-07-01 05:45:24.127 if eqi is None:
2025-07-01 05:45:24.135 eqi, eqj = i, j
2025-07-01 05:45:24.142 continue
2025-07-01 05:45:24.149 cruncher.set_seq1(ai)
2025-07-01 05:45:24.156 # computing similarity is expensive, so use the quick
2025-07-01 05:45:24.163 # upper bounds first -- have seen this speed up messy
2025-07-01 05:45:24.169 # compares by a factor of 3.
2025-07-01 05:45:24.175 # note that ratio() is only expensive to compute the first
2025-07-01 05:45:24.181 # time it's called on a sequence pair; the expensive part
2025-07-01 05:45:24.187 # of the computation is cached by cruncher
2025-07-01 05:45:24.197 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:45:24.209 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:45:24.217 cruncher.ratio() > best_ratio:
2025-07-01 05:45:24.223 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:45:24.229 if best_ratio < cutoff:
2025-07-01 05:45:24.235 # no non-identical "pretty close" pair
2025-07-01 05:45:24.240 if eqi is None:
2025-07-01 05:45:24.247 # no identical pair either -- treat it as a straight replace
2025-07-01 05:45:24.253 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:45:24.259 return
2025-07-01 05:45:24.265 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:45:24.272 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:45:24.278 else:
2025-07-01 05:45:24.285 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:45:24.291 eqi = None
2025-07-01 05:45:24.300
2025-07-01 05:45:24.308 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:45:24.315 # identical
2025-07-01 05:45:24.323
2025-07-01 05:45:24.335 # pump out diffs from before the synch point
2025-07-01 05:45:24.345 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:45:24.352
2025-07-01 05:45:24.361 # do intraline marking on the synch pair
2025-07-01 05:45:24.367 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:45:24.374 if eqi is None:
2025-07-01 05:45:24.382 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:45:24.390 atags = btags = ""
2025-07-01 05:45:24.396 cruncher.set_seqs(aelt, belt)
2025-07-01 05:45:24.403 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:45:24.409 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:45:24.415 if tag == 'replace':
2025-07-01 05:45:24.422 atags += '^' * la
2025-07-01 05:45:24.429 btags += '^' * lb
2025-07-01 05:45:24.434 elif tag == 'delete':
2025-07-01 05:45:24.440 atags += '-' * la
2025-07-01 05:45:24.447 elif tag == 'insert':
2025-07-01 05:45:24.456 btags += '+' * lb
2025-07-01 05:45:24.468 elif tag == 'equal':
2025-07-01 05:45:24.476 atags += ' ' * la
2025-07-01 05:45:24.485 btags += ' ' * lb
2025-07-01 05:45:24.492 else:
2025-07-01 05:45:24.499 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:45:24.504 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:45:24.510 else:
2025-07-01 05:45:24.521 # the synch pair is identical
2025-07-01 05:45:24.530 yield ' ' + aelt
2025-07-01 05:45:24.536
2025-07-01 05:45:24.543 # pump out diffs from after the synch point
2025-07-01 05:45:24.549 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:45:24.555
2025-07-01 05:45:24.567 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:45:24.577 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:45:24.584
2025-07-01 05:45:24.590 self = <difflib.Differ object at [hex]>
2025-07-01 05:45:24.596 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:45:24.602 alo = 92, ahi = 1101
2025-07-01 05:45:24.608 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:45:24.614 blo = 92, bhi = 1101
2025-07-01 05:45:24.622
2025-07-01 05:45:24.630 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:45:24.636 g = []
2025-07-01 05:45:24.643 if alo < ahi:
2025-07-01 05:45:24.649 if blo < bhi:
2025-07-01 05:45:24.655 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:45:24.660 else:
2025-07-01 05:45:24.666 g = self._dump('-', a, alo, ahi)
2025-07-01 05:45:24.672 elif blo < bhi:
2025-07-01 05:45:24.681 g = self._dump('+', b, blo, bhi)
2025-07-01 05:45:24.690
2025-07-01 05:45:24.696 > yield from g
2025-07-01 05:45:24.702
2025-07-01 05:45:24.708 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:45:24.714 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:45:24.720
2025-07-01 05:45:24.726 self = <difflib.Differ object at [hex]>
2025-07-01 05:45:24.732 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:45:24.738 alo = 92, ahi = 1101
2025-07-01 05:45:24.746 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:45:24.752 blo = 92, bhi = 1101
2025-07-01 05:45:24.758
2025-07-01 05:45:24.765 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:45:24.770 r"""
2025-07-01 05:45:24.776 When replacing one block of lines with another, search the blocks
2025-07-01 05:45:24.782 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:45:24.787 synch point, and intraline difference marking is done on the
2025-07-01 05:45:24.793 similar pair. Lots of work, but often worth it.
2025-07-01 05:45:24.799
2025-07-01 05:45:24.805 Example:
2025-07-01 05:45:24.810
2025-07-01 05:45:24.816 >>> d = Differ()
2025-07-01 05:45:24.822 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:45:24.828 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:45:24.833 >>> print(''.join(results), end="")
2025-07-01 05:45:24.839 - abcDefghiJkl
2025-07-01 05:45:24.850 + abcdefGhijkl
2025-07-01 05:45:24.862 """
2025-07-01 05:45:24.868
2025-07-01 05:45:24.874 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:45:24.879 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:45:24.886 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:45:24.891 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:45:24.898 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:45:24.905
2025-07-01 05:45:24.912 # search for the pair that matches best without being identical
2025-07-01 05:45:24.919 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:45:24.926 # on junk -- unless we have to)
2025-07-01 05:45:24.936 for j in range(blo, bhi):
2025-07-01 05:45:24.947 bj = b[j]
2025-07-01 05:45:24.954 cruncher.set_seq2(bj)
2025-07-01 05:45:24.960 for i in range(alo, ahi):
2025-07-01 05:45:24.965 ai = a[i]
2025-07-01 05:45:24.971 if ai == bj:
2025-07-01 05:45:24.978 if eqi is None:
2025-07-01 05:45:24.984 eqi, eqj = i, j
2025-07-01 05:45:24.990 continue
2025-07-01 05:45:25.002 cruncher.set_seq1(ai)
2025-07-01 05:45:25.012 # computing similarity is expensive, so use the quick
2025-07-01 05:45:25.024 # upper bounds first -- have seen this speed up messy
2025-07-01 05:45:25.037 # compares by a factor of 3.
2025-07-01 05:45:25.047 # note that ratio() is only expensive to compute the first
2025-07-01 05:45:25.054 # time it's called on a sequence pair; the expensive part
2025-07-01 05:45:25.061 # of the computation is cached by cruncher
2025-07-01 05:45:25.066 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:45:25.071 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:45:25.077 cruncher.ratio() > best_ratio:
2025-07-01 05:45:25.083 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:45:25.088 if best_ratio < cutoff:
2025-07-01 05:45:25.094 # no non-identical "pretty close" pair
2025-07-01 05:45:25.099 if eqi is None:
2025-07-01 05:45:25.108 # no identical pair either -- treat it as a straight replace
2025-07-01 05:45:25.118 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:45:25.126 return
2025-07-01 05:45:25.132 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:45:25.138 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:45:25.147 else:
2025-07-01 05:45:25.158 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:45:25.166 eqi = None
2025-07-01 05:45:25.176
2025-07-01 05:45:25.183 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:45:25.191 # identical
2025-07-01 05:45:25.199
2025-07-01 05:45:25.205 # pump out diffs from before the synch point
2025-07-01 05:45:25.210 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:45:25.214
2025-07-01 05:45:25.219 # do intraline marking on the synch pair
2025-07-01 05:45:25.223 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:45:25.228 if eqi is None:
2025-07-01 05:45:25.233 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:45:25.238 atags = btags = ""
2025-07-01 05:45:25.247 cruncher.set_seqs(aelt, belt)
2025-07-01 05:45:25.257 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:45:25.266 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:45:25.275 if tag == 'replace':
2025-07-01 05:45:25.283 atags += '^' * la
2025-07-01 05:45:25.291 btags += '^' * lb
2025-07-01 05:45:25.298 elif tag == 'delete':
2025-07-01 05:45:25.304 atags += '-' * la
2025-07-01 05:45:25.311 elif tag == 'insert':
2025-07-01 05:45:25.322 btags += '+' * lb
2025-07-01 05:45:25.333 elif tag == 'equal':
2025-07-01 05:45:25.341 atags += ' ' * la
2025-07-01 05:45:25.348 btags += ' ' * lb
2025-07-01 05:45:25.355 else:
2025-07-01 05:45:25.363 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:45:25.373 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:45:25.381 else:
2025-07-01 05:45:25.389 # the synch pair is identical
2025-07-01 05:45:25.396 yield ' ' + aelt
2025-07-01 05:45:25.406
2025-07-01 05:45:25.418 # pump out diffs from after the synch point
2025-07-01 05:45:25.429 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:45:25.439
2025-07-01 05:45:25.447 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:45:25.453 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:45:25.459
2025-07-01 05:45:25.465 self = <difflib.Differ object at [hex]>
2025-07-01 05:45:25.475 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:45:25.485 alo = 93, ahi = 1101
2025-07-01 05:45:25.494 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:45:25.501 blo = 93, bhi = 1101
2025-07-01 05:45:25.509
2025-07-01 05:45:25.517 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:45:25.528 g = []
2025-07-01 05:45:25.538 if alo < ahi:
2025-07-01 05:45:25.549 if blo < bhi:
2025-07-01 05:45:25.561 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:45:25.573 else:
2025-07-01 05:45:25.584 g = self._dump('-', a, alo, ahi)
2025-07-01 05:45:25.593 elif blo < bhi:
2025-07-01 05:45:25.603 g = self._dump('+', b, blo, bhi)
2025-07-01 05:45:25.614
2025-07-01 05:45:25.626 > yield from g
2025-07-01 05:45:25.636
2025-07-01 05:45:25.649 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:45:25.661 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:45:25.672
2025-07-01 05:45:25.680 self = <difflib.Differ object at [hex]>
2025-07-01 05:45:25.691 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:45:25.703 alo = 93, ahi = 1101
2025-07-01 05:45:25.714 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:45:25.724 blo = 93, bhi = 1101
2025-07-01 05:45:25.734
2025-07-01 05:45:25.743 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:45:25.751 r"""
2025-07-01 05:45:25.763 When replacing one block of lines with another, search the blocks
2025-07-01 05:45:25.773 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:45:25.781 synch point, and intraline difference marking is done on the
2025-07-01 05:45:25.788 similar pair. Lots of work, but often worth it.
2025-07-01 05:45:25.794
2025-07-01 05:45:25.800 Example:
2025-07-01 05:45:25.806
2025-07-01 05:45:25.818 >>> d = Differ()
2025-07-01 05:45:25.829 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:45:25.840 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:45:25.850 >>> print(''.join(results), end="")
2025-07-01 05:45:25.860 - abcDefghiJkl
2025-07-01 05:45:25.883 + abcdefGhijkl
2025-07-01 05:45:25.908 """
2025-07-01 05:45:25.920
2025-07-01 05:45:25.927 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:45:25.933 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:45:25.940 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:45:25.946 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:45:25.953 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:45:25.959
2025-07-01 05:45:25.965 # search for the pair that matches best without being identical
2025-07-01 05:45:25.971 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:45:25.979 # on junk -- unless we have to)
2025-07-01 05:45:25.989 for j in range(blo, bhi):
2025-07-01 05:45:25.998 bj = b[j]
2025-07-01 05:45:26.007 cruncher.set_seq2(bj)
2025-07-01 05:45:26.013 for i in range(alo, ahi):
2025-07-01 05:45:26.019 ai = a[i]
2025-07-01 05:45:26.025 if ai == bj:
2025-07-01 05:45:26.031 if eqi is None:
2025-07-01 05:45:26.039 eqi, eqj = i, j
2025-07-01 05:45:26.048 continue
2025-07-01 05:45:26.060 cruncher.set_seq1(ai)
2025-07-01 05:45:26.070 # computing similarity is expensive, so use the quick
2025-07-01 05:45:26.077 # upper bounds first -- have seen this speed up messy
2025-07-01 05:45:26.083 # compares by a factor of 3.
2025-07-01 05:45:26.089 # note that ratio() is only expensive to compute the first
2025-07-01 05:45:26.095 # time it's called on a sequence pair; the expensive part
2025-07-01 05:45:26.103 # of the computation is cached by cruncher
2025-07-01 05:45:26.114 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:45:26.122 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:45:26.129 cruncher.ratio() > best_ratio:
2025-07-01 05:45:26.136 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:45:26.142 if best_ratio < cutoff:
2025-07-01 05:45:26.149 # no non-identical "pretty close" pair
2025-07-01 05:45:26.156 if eqi is None:
2025-07-01 05:45:26.162 # no identical pair either -- treat it as a straight replace
2025-07-01 05:45:26.168 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:45:26.175 return
2025-07-01 05:45:26.183 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:45:26.196 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:45:26.204 else:
2025-07-01 05:45:26.210 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:45:26.215 eqi = None
2025-07-01 05:45:26.220
2025-07-01 05:45:26.225 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:45:26.230 # identical
2025-07-01 05:45:26.237
2025-07-01 05:45:26.244 # pump out diffs from before the synch point
2025-07-01 05:45:26.252 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:45:26.259
2025-07-01 05:45:26.266 # do intraline marking on the synch pair
2025-07-01 05:45:26.276 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:45:26.285 if eqi is None:
2025-07-01 05:45:26.293 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:45:26.299 atags = btags = ""
2025-07-01 05:45:26.305 cruncher.set_seqs(aelt, belt)
2025-07-01 05:45:26.310 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:45:26.316 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:45:26.322 if tag == 'replace':
2025-07-01 05:45:26.331 atags += '^' * la
2025-07-01 05:45:26.344 btags += '^' * lb
2025-07-01 05:45:26.356 elif tag == 'delete':
2025-07-01 05:45:26.369 atags += '-' * la
2025-07-01 05:45:26.379 elif tag == 'insert':
2025-07-01 05:45:26.389 btags += '+' * lb
2025-07-01 05:45:26.395 elif tag == 'equal':
2025-07-01 05:45:26.402 atags += ' ' * la
2025-07-01 05:45:26.410 btags += ' ' * lb
2025-07-01 05:45:26.416 else:
2025-07-01 05:45:26.422 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:45:26.431 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:45:26.438 else:
2025-07-01 05:45:26.445 # the synch pair is identical
2025-07-01 05:45:26.452 yield ' ' + aelt
2025-07-01 05:45:26.459
2025-07-01 05:45:26.471 # pump out diffs from after the synch point
2025-07-01 05:45:26.479 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:45:26.491
2025-07-01 05:45:26.501 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:45:26.513 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:45:26.522
2025-07-01 05:45:26.535 self = <difflib.Differ object at [hex]>
2025-07-01 05:45:26.545 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:45:26.553 alo = 94, ahi = 1101
2025-07-01 05:45:26.563 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:45:26.574 blo = 94, bhi = 1101
2025-07-01 05:45:26.583
2025-07-01 05:45:26.593 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:45:26.607 g = []
2025-07-01 05:45:26.618 if alo < ahi:
2025-07-01 05:45:26.625 if blo < bhi:
2025-07-01 05:45:26.631 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:45:26.636 else:
2025-07-01 05:45:26.642 g = self._dump('-', a, alo, ahi)
2025-07-01 05:45:26.647 elif blo < bhi:
2025-07-01 05:45:26.652 g = self._dump('+', b, blo, bhi)
2025-07-01 05:45:26.659
2025-07-01 05:45:26.669 > yield from g
2025-07-01 05:45:26.678
2025-07-01 05:45:26.685 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:45:26.694 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:45:26.710
2025-07-01 05:45:26.719 self = <difflib.Differ object at [hex]>
2025-07-01 05:45:26.732 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:45:26.740 alo = 94, ahi = 1101
2025-07-01 05:45:26.747 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:45:26.753 blo = 94, bhi = 1101
2025-07-01 05:45:26.758
2025-07-01 05:45:26.764 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:45:26.771 r"""
2025-07-01 05:45:26.778 When replacing one block of lines with another, search the blocks
2025-07-01 05:45:26.785 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:45:26.792 synch point, and intraline difference marking is done on the
2025-07-01 05:45:26.799 similar pair. Lots of work, but often worth it.
2025-07-01 05:45:26.809
2025-07-01 05:45:26.817 Example:
2025-07-01 05:45:26.824
2025-07-01 05:45:26.831 >>> d = Differ()
2025-07-01 05:45:26.840 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:45:26.849 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:45:26.856 >>> print(''.join(results), end="")
2025-07-01 05:45:26.863 - abcDefghiJkl
2025-07-01 05:45:26.875 + abcdefGhijkl
2025-07-01 05:45:26.886 """
2025-07-01 05:45:26.897
2025-07-01 05:45:26.908 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:45:26.916 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:45:26.924 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:45:26.930 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:45:26.937 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:45:26.946
2025-07-01 05:45:26.952 # search for the pair that matches best without being identical
2025-07-01 05:45:26.959 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:45:26.967 # on junk -- unless we have to)
2025-07-01 05:45:26.973 for j in range(blo, bhi):
2025-07-01 05:45:26.978 bj = b[j]
2025-07-01 05:45:26.986 cruncher.set_seq2(bj)
2025-07-01 05:45:26.993 for i in range(alo, ahi):
2025-07-01 05:45:26.999 ai = a[i]
2025-07-01 05:45:27.004 if ai == bj:
2025-07-01 05:45:27.011 if eqi is None:
2025-07-01 05:45:27.018 eqi, eqj = i, j
2025-07-01 05:45:27.026 continue
2025-07-01 05:45:27.033 cruncher.set_seq1(ai)
2025-07-01 05:45:27.040 # computing similarity is expensive, so use the quick
2025-07-01 05:45:27.047 # upper bounds first -- have seen this speed up messy
2025-07-01 05:45:27.055 # compares by a factor of 3.
2025-07-01 05:45:27.065 # note that ratio() is only expensive to compute the first
2025-07-01 05:45:27.073 # time it's called on a sequence pair; the expensive part
2025-07-01 05:45:27.080 # of the computation is cached by cruncher
2025-07-01 05:45:27.085 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:45:27.090 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:45:27.100 cruncher.ratio() > best_ratio:
2025-07-01 05:45:27.106 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:45:27.112 if best_ratio < cutoff:
2025-07-01 05:45:27.118 # no non-identical "pretty close" pair
2025-07-01 05:45:27.123 if eqi is None:
2025-07-01 05:45:27.133 # no identical pair either -- treat it as a straight replace
2025-07-01 05:45:27.143 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:45:27.156 return
2025-07-01 05:45:27.164 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:45:27.171 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:45:27.177 else:
2025-07-01 05:45:27.183 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:45:27.188 eqi = None
2025-07-01 05:45:27.194
2025-07-01 05:45:27.202 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:45:27.211 # identical
2025-07-01 05:45:27.217
2025-07-01 05:45:27.223 # pump out diffs from before the synch point
2025-07-01 05:45:27.228 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:45:27.240
2025-07-01 05:45:27.248 # do intraline marking on the synch pair
2025-07-01 05:45:27.255 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:45:27.262 if eqi is None:
2025-07-01 05:45:27.268 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:45:27.274 atags = btags = ""
2025-07-01 05:45:27.279 cruncher.set_seqs(aelt, belt)
2025-07-01 05:45:27.285 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:45:27.292 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:45:27.299 if tag == 'replace':
2025-07-01 05:45:27.305 atags += '^' * la
2025-07-01 05:45:27.312 btags += '^' * lb
2025-07-01 05:45:27.320 elif tag == 'delete':
2025-07-01 05:45:27.327 atags += '-' * la
2025-07-01 05:45:27.333 elif tag == 'insert':
2025-07-01 05:45:27.344 btags += '+' * lb
2025-07-01 05:45:27.354 elif tag == 'equal':
2025-07-01 05:45:27.360 atags += ' ' * la
2025-07-01 05:45:27.367 btags += ' ' * lb
2025-07-01 05:45:27.374 else:
2025-07-01 05:45:27.385 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:45:27.395 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:45:27.406 else:
2025-07-01 05:45:27.414 # the synch pair is identical
2025-07-01 05:45:27.423 yield ' ' + aelt
2025-07-01 05:45:27.430
2025-07-01 05:45:27.452 # pump out diffs from after the synch point
2025-07-01 05:45:27.460 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:45:27.466
2025-07-01 05:45:27.472 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:45:27.478 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:45:27.486
2025-07-01 05:45:27.494 self = <difflib.Differ object at [hex]>
2025-07-01 05:45:27.500 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:45:27.506 alo = 95, ahi = 1101
2025-07-01 05:45:27.513 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:45:27.520 blo = 95, bhi = 1101
2025-07-01 05:45:27.526
2025-07-01 05:45:27.538 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:45:27.551 g = []
2025-07-01 05:45:27.561 if alo < ahi:
2025-07-01 05:45:27.569 if blo < bhi:
2025-07-01 05:45:27.575 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:45:27.580 else:
2025-07-01 05:45:27.585 g = self._dump('-', a, alo, ahi)
2025-07-01 05:45:27.590 elif blo < bhi:
2025-07-01 05:45:27.596 g = self._dump('+', b, blo, bhi)
2025-07-01 05:45:27.604
2025-07-01 05:45:27.614 > yield from g
2025-07-01 05:45:27.622
2025-07-01 05:45:27.630 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:45:27.638 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:45:27.650
2025-07-01 05:45:27.659 self = <difflib.Differ object at [hex]>
2025-07-01 05:45:27.666 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:45:27.670 alo = 95, ahi = 1101
2025-07-01 05:45:27.679 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:45:27.686 blo = 95, bhi = 1101
2025-07-01 05:45:27.694
2025-07-01 05:45:27.700 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:45:27.706 r"""
2025-07-01 05:45:27.711 When replacing one block of lines with another, search the blocks
2025-07-01 05:45:27.719 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:45:27.730 synch point, and intraline difference marking is done on the
2025-07-01 05:45:27.738 similar pair. Lots of work, but often worth it.
2025-07-01 05:45:27.749
2025-07-01 05:45:27.760 Example:
2025-07-01 05:45:27.768
2025-07-01 05:45:27.774 >>> d = Differ()
2025-07-01 05:45:27.780 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:45:27.786 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:45:27.793 >>> print(''.join(results), end="")
2025-07-01 05:45:27.799 - abcDefghiJkl
2025-07-01 05:45:27.811 + abcdefGhijkl
2025-07-01 05:45:27.821 """
2025-07-01 05:45:27.827
2025-07-01 05:45:27.835 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:45:27.843 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:45:27.853 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:45:27.860 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:45:27.873 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:45:27.887
2025-07-01 05:45:27.896 # search for the pair that matches best without being identical
2025-07-01 05:45:27.903 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:45:27.911 # on junk -- unless we have to)
2025-07-01 05:45:27.916 for j in range(blo, bhi):
2025-07-01 05:45:27.921 bj = b[j]
2025-07-01 05:45:27.926 cruncher.set_seq2(bj)
2025-07-01 05:45:27.931 for i in range(alo, ahi):
2025-07-01 05:45:27.936 ai = a[i]
2025-07-01 05:45:27.941 if ai == bj:
2025-07-01 05:45:27.946 if eqi is None:
2025-07-01 05:45:27.953 eqi, eqj = i, j
2025-07-01 05:45:27.966 continue
2025-07-01 05:45:27.973 cruncher.set_seq1(ai)
2025-07-01 05:45:27.979 # computing similarity is expensive, so use the quick
2025-07-01 05:45:27.985 # upper bounds first -- have seen this speed up messy
2025-07-01 05:45:27.991 # compares by a factor of 3.
2025-07-01 05:45:28.001 # note that ratio() is only expensive to compute the first
2025-07-01 05:45:28.014 # time it's called on a sequence pair; the expensive part
2025-07-01 05:45:28.023 # of the computation is cached by cruncher
2025-07-01 05:45:28.033 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:45:28.044 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:45:28.057 cruncher.ratio() > best_ratio:
2025-07-01 05:45:28.066 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:45:28.074 if best_ratio < cutoff:
2025-07-01 05:45:28.081 # no non-identical "pretty close" pair
2025-07-01 05:45:28.089 if eqi is None:
2025-07-01 05:45:28.098 # no identical pair either -- treat it as a straight replace
2025-07-01 05:45:28.106 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:45:28.112 return
2025-07-01 05:45:28.119 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:45:28.124 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:45:28.129 else:
2025-07-01 05:45:28.133 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:45:28.138 eqi = None
2025-07-01 05:45:28.143
2025-07-01 05:45:28.151 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:45:28.161 # identical
2025-07-01 05:45:28.169
2025-07-01 05:45:28.181 # pump out diffs from before the synch point
2025-07-01 05:45:28.191 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:45:28.203
2025-07-01 05:45:28.212 # do intraline marking on the synch pair
2025-07-01 05:45:28.221 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:45:28.227 if eqi is None:
2025-07-01 05:45:28.234 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:45:28.245 atags = btags = ""
2025-07-01 05:45:28.256 cruncher.set_seqs(aelt, belt)
2025-07-01 05:45:28.270 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:45:28.282 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:45:28.293 if tag == 'replace':
2025-07-01 05:45:28.303 atags += '^' * la
2025-07-01 05:45:28.315 btags += '^' * lb
2025-07-01 05:45:28.324 elif tag == 'delete':
2025-07-01 05:45:28.332 atags += '-' * la
2025-07-01 05:45:28.338 elif tag == 'insert':
2025-07-01 05:45:28.344 btags += '+' * lb
2025-07-01 05:45:28.350 elif tag == 'equal':
2025-07-01 05:45:28.356 atags += ' ' * la
2025-07-01 05:45:28.363 btags += ' ' * lb
2025-07-01 05:45:28.371 else:
2025-07-01 05:45:28.383 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:45:28.391 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:45:28.399 else:
2025-07-01 05:45:28.407 # the synch pair is identical
2025-07-01 05:45:28.418 yield ' ' + aelt
2025-07-01 05:45:28.427
2025-07-01 05:45:28.435 # pump out diffs from after the synch point
2025-07-01 05:45:28.447 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:45:28.459
2025-07-01 05:45:28.471 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:45:28.480 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:45:28.489
2025-07-01 05:45:28.496 self = <difflib.Differ object at [hex]>
2025-07-01 05:45:28.509 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:45:28.519 alo = 96, ahi = 1101
2025-07-01 05:45:28.527 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:45:28.533 blo = 96, bhi = 1101
2025-07-01 05:45:28.539
2025-07-01 05:45:28.545 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:45:28.551 g = []
2025-07-01 05:45:28.557 if alo < ahi:
2025-07-01 05:45:28.563 if blo < bhi:
2025-07-01 05:45:28.569 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:45:28.574 else:
2025-07-01 05:45:28.580 g = self._dump('-', a, alo, ahi)
2025-07-01 05:45:28.587 elif blo < bhi:
2025-07-01 05:45:28.597 g = self._dump('+', b, blo, bhi)
2025-07-01 05:45:28.605
2025-07-01 05:45:28.612 > yield from g
2025-07-01 05:45:28.619
2025-07-01 05:45:28.624 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:45:28.631 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:45:28.636
2025-07-01 05:45:28.642 self = <difflib.Differ object at [hex]>
2025-07-01 05:45:28.649 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:45:28.655 alo = 96, ahi = 1101
2025-07-01 05:45:28.662 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:45:28.671 blo = 96, bhi = 1101
2025-07-01 05:45:28.678
2025-07-01 05:45:28.685 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:45:28.691 r"""
2025-07-01 05:45:28.698 When replacing one block of lines with another, search the blocks
2025-07-01 05:45:28.707 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:45:28.719 synch point, and intraline difference marking is done on the
2025-07-01 05:45:28.731 similar pair. Lots of work, but often worth it.
2025-07-01 05:45:28.745
2025-07-01 05:45:28.758 Example:
2025-07-01 05:45:28.769
2025-07-01 05:45:28.778 >>> d = Differ()
2025-07-01 05:45:28.788 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:45:28.796 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:45:28.802 >>> print(''.join(results), end="")
2025-07-01 05:45:28.809 - abcDefghiJkl
2025-07-01 05:45:28.820 + abcdefGhijkl
2025-07-01 05:45:28.836 """
2025-07-01 05:45:28.846
2025-07-01 05:45:28.856 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:45:28.865 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:45:28.873 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:45:28.881 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:45:28.892 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:45:28.902
2025-07-01 05:45:28.912 # search for the pair that matches best without being identical
2025-07-01 05:45:28.922 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:45:28.933 # on junk -- unless we have to)
2025-07-01 05:45:28.943 for j in range(blo, bhi):
2025-07-01 05:45:28.951 bj = b[j]
2025-07-01 05:45:28.959 cruncher.set_seq2(bj)
2025-07-01 05:45:28.970 for i in range(alo, ahi):
2025-07-01 05:45:28.978 ai = a[i]
2025-07-01 05:45:28.985 if ai == bj:
2025-07-01 05:45:28.991 if eqi is None:
2025-07-01 05:45:29.005 eqi, eqj = i, j
2025-07-01 05:45:29.015 continue
2025-07-01 05:45:29.023 cruncher.set_seq1(ai)
2025-07-01 05:45:29.030 # computing similarity is expensive, so use the quick
2025-07-01 05:45:29.037 # upper bounds first -- have seen this speed up messy
2025-07-01 05:45:29.043 # compares by a factor of 3.
2025-07-01 05:45:29.049 # note that ratio() is only expensive to compute the first
2025-07-01 05:45:29.055 # time it's called on a sequence pair; the expensive part
2025-07-01 05:45:29.061 # of the computation is cached by cruncher
2025-07-01 05:45:29.066 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:45:29.072 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:45:29.079 cruncher.ratio() > best_ratio:
2025-07-01 05:45:29.084 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:45:29.090 if best_ratio < cutoff:
2025-07-01 05:45:29.101 # no non-identical "pretty close" pair
2025-07-01 05:45:29.112 if eqi is None:
2025-07-01 05:45:29.123 # no identical pair either -- treat it as a straight replace
2025-07-01 05:45:29.133 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:45:29.141 return
2025-07-01 05:45:29.149 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:45:29.155 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:45:29.161 else:
2025-07-01 05:45:29.168 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:45:29.177 eqi = None
2025-07-01 05:45:29.190
2025-07-01 05:45:29.200 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:45:29.210 # identical
2025-07-01 05:45:29.223
2025-07-01 05:45:29.233 # pump out diffs from before the synch point
2025-07-01 05:45:29.245 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:45:29.256
2025-07-01 05:45:29.267 # do intraline marking on the synch pair
2025-07-01 05:45:29.277 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:45:29.285 if eqi is None:
2025-07-01 05:45:29.296 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:45:29.308 atags = btags = ""
2025-07-01 05:45:29.316 cruncher.set_seqs(aelt, belt)
2025-07-01 05:45:29.327 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:45:29.337 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:45:29.349 if tag == 'replace':
2025-07-01 05:45:29.362 atags += '^' * la
2025-07-01 05:45:29.373 btags += '^' * lb
2025-07-01 05:45:29.380 elif tag == 'delete':
2025-07-01 05:45:29.386 atags += '-' * la
2025-07-01 05:45:29.393 elif tag == 'insert':
2025-07-01 05:45:29.399 btags += '+' * lb
2025-07-01 05:45:29.404 elif tag == 'equal':
2025-07-01 05:45:29.410 atags += ' ' * la
2025-07-01 05:45:29.421 btags += ' ' * lb
2025-07-01 05:45:29.430 else:
2025-07-01 05:45:29.437 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:45:29.443 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:45:29.450 else:
2025-07-01 05:45:29.460 # the synch pair is identical
2025-07-01 05:45:29.469 yield ' ' + aelt
2025-07-01 05:45:29.477
2025-07-01 05:45:29.484 # pump out diffs from after the synch point
2025-07-01 05:45:29.494 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:45:29.506
2025-07-01 05:45:29.514 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:45:29.521 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:45:29.527
2025-07-01 05:45:29.534 self = <difflib.Differ object at [hex]>
2025-07-01 05:45:29.545 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:45:29.553 alo = 97, ahi = 1101
2025-07-01 05:45:29.559 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:45:29.566 blo = 97, bhi = 1101
2025-07-01 05:45:29.573
2025-07-01 05:45:29.583 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:45:29.591 g = []
2025-07-01 05:45:29.599 if alo < ahi:
2025-07-01 05:45:29.610 if blo < bhi:
2025-07-01 05:45:29.618 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:45:29.628 else:
2025-07-01 05:45:29.636 g = self._dump('-', a, alo, ahi)
2025-07-01 05:45:29.642 elif blo < bhi:
2025-07-01 05:45:29.648 g = self._dump('+', b, blo, bhi)
2025-07-01 05:45:29.654
2025-07-01 05:45:29.660 > yield from g
2025-07-01 05:45:29.668
2025-07-01 05:45:29.677 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:45:29.690 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:45:29.702
2025-07-01 05:45:29.714 self = <difflib.Differ object at [hex]>
2025-07-01 05:45:29.724 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:45:29.731 alo = 97, ahi = 1101
2025-07-01 05:45:29.737 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:45:29.746 blo = 97, bhi = 1101
2025-07-01 05:45:29.758
2025-07-01 05:45:29.767 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:45:29.775 r"""
2025-07-01 05:45:29.783 When replacing one block of lines with another, search the blocks
2025-07-01 05:45:29.790 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:45:29.796 synch point, and intraline difference marking is done on the
2025-07-01 05:45:29.801 similar pair. Lots of work, but often worth it.
2025-07-01 05:45:29.807
2025-07-01 05:45:29.818 Example:
2025-07-01 05:45:29.830
2025-07-01 05:45:29.839 >>> d = Differ()
2025-07-01 05:45:29.847 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:45:29.855 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:45:29.862 >>> print(''.join(results), end="")
2025-07-01 05:45:29.868 - abcDefghiJkl
2025-07-01 05:45:29.885 + abcdefGhijkl
2025-07-01 05:45:29.903 """
2025-07-01 05:45:29.910
2025-07-01 05:45:29.916 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:45:29.923 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:45:29.928 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:45:29.941 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:45:29.954 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:45:29.962
2025-07-01 05:45:29.970 # search for the pair that matches best without being identical
2025-07-01 05:45:29.981 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:45:29.990 # on junk -- unless we have to)
2025-07-01 05:45:29.998 for j in range(blo, bhi):
2025-07-01 05:45:30.006 bj = b[j]
2025-07-01 05:45:30.013 cruncher.set_seq2(bj)
2025-07-01 05:45:30.019 for i in range(alo, ahi):
2025-07-01 05:45:30.025 ai = a[i]
2025-07-01 05:45:30.034 if ai == bj:
2025-07-01 05:45:30.046 if eqi is None:
2025-07-01 05:45:30.054 eqi, eqj = i, j
2025-07-01 05:45:30.062 continue
2025-07-01 05:45:30.069 cruncher.set_seq1(ai)
2025-07-01 05:45:30.076 # computing similarity is expensive, so use the quick
2025-07-01 05:45:30.082 # upper bounds first -- have seen this speed up messy
2025-07-01 05:45:30.089 # compares by a factor of 3.
2025-07-01 05:45:30.096 # note that ratio() is only expensive to compute the first
2025-07-01 05:45:30.105 # time it's called on a sequence pair; the expensive part
2025-07-01 05:45:30.117 # of the computation is cached by cruncher
2025-07-01 05:45:30.130 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:45:30.141 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:45:30.154 cruncher.ratio() > best_ratio:
2025-07-01 05:45:30.166 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:45:30.178 if best_ratio < cutoff:
2025-07-01 05:45:30.189 # no non-identical "pretty close" pair
2025-07-01 05:45:30.197 if eqi is None:
2025-07-01 05:45:30.206 # no identical pair either -- treat it as a straight replace
2025-07-01 05:45:30.219 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:45:30.230 return
2025-07-01 05:45:30.239 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:45:30.247 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:45:30.253 else:
2025-07-01 05:45:30.266 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:45:30.279 eqi = None
2025-07-01 05:45:30.288
2025-07-01 05:45:30.300 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:45:30.313 # identical
2025-07-01 05:45:30.323
2025-07-01 05:45:30.332 # pump out diffs from before the synch point
2025-07-01 05:45:30.346 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:45:30.358
2025-07-01 05:45:30.368 # do intraline marking on the synch pair
2025-07-01 05:45:30.376 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:45:30.382 if eqi is None:
2025-07-01 05:45:30.388 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:45:30.396 atags = btags = ""
2025-07-01 05:45:30.404 cruncher.set_seqs(aelt, belt)
2025-07-01 05:45:30.412 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:45:30.420 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:45:30.427 if tag == 'replace':
2025-07-01 05:45:30.435 atags += '^' * la
2025-07-01 05:45:30.442 btags += '^' * lb
2025-07-01 05:45:30.453 elif tag == 'delete':
2025-07-01 05:45:30.462 atags += '-' * la
2025-07-01 05:45:30.469 elif tag == 'insert':
2025-07-01 05:45:30.478 btags += '+' * lb
2025-07-01 05:45:30.489 elif tag == 'equal':
2025-07-01 05:45:30.498 atags += ' ' * la
2025-07-01 05:45:30.505 btags += ' ' * lb
2025-07-01 05:45:30.515 else:
2025-07-01 05:45:30.527 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:45:30.535 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:45:30.541 else:
2025-07-01 05:45:30.547 # the synch pair is identical
2025-07-01 05:45:30.551 yield ' ' + aelt
2025-07-01 05:45:30.556
2025-07-01 05:45:30.561 # pump out diffs from after the synch point
2025-07-01 05:45:30.566 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:45:30.575
2025-07-01 05:45:30.586 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:45:30.595 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:45:30.603
2025-07-01 05:45:30.615 self = <difflib.Differ object at [hex]>
2025-07-01 05:45:30.625 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:45:30.632 alo = 98, ahi = 1101
2025-07-01 05:45:30.638 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:45:30.649 blo = 98, bhi = 1101
2025-07-01 05:45:30.661
2025-07-01 05:45:30.672 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:45:30.682 g = []
2025-07-01 05:45:30.691 if alo < ahi:
2025-07-01 05:45:30.697 if blo < bhi:
2025-07-01 05:45:30.703 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:45:30.710 else:
2025-07-01 05:45:30.719 g = self._dump('-', a, alo, ahi)
2025-07-01 05:45:30.726 elif blo < bhi:
2025-07-01 05:45:30.732 g = self._dump('+', b, blo, bhi)
2025-07-01 05:45:30.737
2025-07-01 05:45:30.742 > yield from g
2025-07-01 05:45:30.746
2025-07-01 05:45:30.755 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:45:30.762 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:45:30.769
2025-07-01 05:45:30.776 self = <difflib.Differ object at [hex]>
2025-07-01 05:45:30.787 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:45:30.794 alo = 98, ahi = 1101
2025-07-01 05:45:30.803 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:45:30.811 blo = 98, bhi = 1101
2025-07-01 05:45:30.817
2025-07-01 05:45:30.828 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:45:30.840 r"""
2025-07-01 05:45:30.849 When replacing one block of lines with another, search the blocks
2025-07-01 05:45:30.856 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:45:30.862 synch point, and intraline difference marking is done on the
2025-07-01 05:45:30.869 similar pair. Lots of work, but often worth it.
2025-07-01 05:45:30.875
2025-07-01 05:45:30.881 Example:
2025-07-01 05:45:30.888
2025-07-01 05:45:30.896 >>> d = Differ()
2025-07-01 05:45:30.904 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:45:30.911 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:45:30.918 >>> print(''.join(results), end="")
2025-07-01 05:45:30.924 - abcDefghiJkl
2025-07-01 05:45:30.940 + abcdefGhijkl
2025-07-01 05:45:30.964 """
2025-07-01 05:45:30.974
2025-07-01 05:45:30.985 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:45:30.996 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:45:31.004 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:45:31.011 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:45:31.018 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:45:31.024
2025-07-01 05:45:31.031 # search for the pair that matches best without being identical
2025-07-01 05:45:31.037 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:45:31.042 # on junk -- unless we have to)
2025-07-01 05:45:31.049 for j in range(blo, bhi):
2025-07-01 05:45:31.055 bj = b[j]
2025-07-01 05:45:31.061 cruncher.set_seq2(bj)
2025-07-01 05:45:31.068 for i in range(alo, ahi):
2025-07-01 05:45:31.075 ai = a[i]
2025-07-01 05:45:31.082 if ai == bj:
2025-07-01 05:45:31.094 if eqi is None:
2025-07-01 05:45:31.103 eqi, eqj = i, j
2025-07-01 05:45:31.110 continue
2025-07-01 05:45:31.121 cruncher.set_seq1(ai)
2025-07-01 05:45:31.130 # computing similarity is expensive, so use the quick
2025-07-01 05:45:31.138 # upper bounds first -- have seen this speed up messy
2025-07-01 05:45:31.143 # compares by a factor of 3.
2025-07-01 05:45:31.149 # note that ratio() is only expensive to compute the first
2025-07-01 05:45:31.156 # time it's called on a sequence pair; the expensive part
2025-07-01 05:45:31.162 # of the computation is cached by cruncher
2025-07-01 05:45:31.172 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:45:31.180 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:45:31.188 cruncher.ratio() > best_ratio:
2025-07-01 05:45:31.199 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:45:31.207 if best_ratio < cutoff:
2025-07-01 05:45:31.216 # no non-identical "pretty close" pair
2025-07-01 05:45:31.226 if eqi is None:
2025-07-01 05:45:31.234 # no identical pair either -- treat it as a straight replace
2025-07-01 05:45:31.247 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:45:31.254 return
2025-07-01 05:45:31.260 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:45:31.266 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:45:31.271 else:
2025-07-01 05:45:31.275 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:45:31.280 eqi = None
2025-07-01 05:45:31.286
2025-07-01 05:45:31.296 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:45:31.306 # identical
2025-07-01 05:45:31.313
2025-07-01 05:45:31.320 # pump out diffs from before the synch point
2025-07-01 05:45:31.327 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:45:31.334
2025-07-01 05:45:31.341 # do intraline marking on the synch pair
2025-07-01 05:45:31.349 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:45:31.360 if eqi is None:
2025-07-01 05:45:31.371 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:45:31.381 atags = btags = ""
2025-07-01 05:45:31.387 cruncher.set_seqs(aelt, belt)
2025-07-01 05:45:31.395 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:45:31.407 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:45:31.414 if tag == 'replace':
2025-07-01 05:45:31.421 atags += '^' * la
2025-07-01 05:45:31.428 btags += '^' * lb
2025-07-01 05:45:31.436 elif tag == 'delete':
2025-07-01 05:45:31.443 atags += '-' * la
2025-07-01 05:45:31.449 elif tag == 'insert':
2025-07-01 05:45:31.455 btags += '+' * lb
2025-07-01 05:45:31.461 elif tag == 'equal':
2025-07-01 05:45:31.466 atags += ' ' * la
2025-07-01 05:45:31.473 btags += ' ' * lb
2025-07-01 05:45:31.479 else:
2025-07-01 05:45:31.485 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:45:31.491 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:45:31.499 else:
2025-07-01 05:45:31.505 # the synch pair is identical
2025-07-01 05:45:31.515 yield ' ' + aelt
2025-07-01 05:45:31.521
2025-07-01 05:45:31.533 # pump out diffs from after the synch point
2025-07-01 05:45:31.542 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:45:31.548
2025-07-01 05:45:31.555 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:45:31.562 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:45:31.567
2025-07-01 05:45:31.573 self = <difflib.Differ object at [hex]>
2025-07-01 05:45:31.581 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:45:31.586 alo = 99, ahi = 1101
2025-07-01 05:45:31.593 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:45:31.600 blo = 99, bhi = 1101
2025-07-01 05:45:31.607
2025-07-01 05:45:31.616 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:45:31.624 g = []
2025-07-01 05:45:31.631 if alo < ahi:
2025-07-01 05:45:31.638 if blo < bhi:
2025-07-01 05:45:31.646 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:45:31.654 else:
2025-07-01 05:45:31.667 g = self._dump('-', a, alo, ahi)
2025-07-01 05:45:31.678 elif blo < bhi:
2025-07-01 05:45:31.690 g = self._dump('+', b, blo, bhi)
2025-07-01 05:45:31.700
2025-07-01 05:45:31.708 > yield from g
2025-07-01 05:45:31.715
2025-07-01 05:45:31.723 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:45:31.733 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:45:31.742
2025-07-01 05:45:31.748 self = <difflib.Differ object at [hex]>
2025-07-01 05:45:31.755 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:45:31.761 alo = 99, ahi = 1101
2025-07-01 05:45:31.768 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:45:31.774 blo = 99, bhi = 1101
2025-07-01 05:45:31.779
2025-07-01 05:45:31.785 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:45:31.790 r"""
2025-07-01 05:45:31.801 When replacing one block of lines with another, search the blocks
2025-07-01 05:45:31.810 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:45:31.817 synch point, and intraline difference marking is done on the
2025-07-01 05:45:31.823 similar pair. Lots of work, but often worth it.
2025-07-01 05:45:31.830
2025-07-01 05:45:31.844 Example:
2025-07-01 05:45:31.852
2025-07-01 05:45:31.858 >>> d = Differ()
2025-07-01 05:45:31.864 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:45:31.871 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:45:31.877 >>> print(''.join(results), end="")
2025-07-01 05:45:31.883 - abcDefghiJkl
2025-07-01 05:45:31.894 + abcdefGhijkl
2025-07-01 05:45:31.906 """
2025-07-01 05:45:31.914
2025-07-01 05:45:31.921 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:45:31.927 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:45:31.934 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:45:31.940 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:45:31.946 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:45:31.951
2025-07-01 05:45:31.956 # search for the pair that matches best without being identical
2025-07-01 05:45:31.962 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:45:31.968 # on junk -- unless we have to)
2025-07-01 05:45:31.973 for j in range(blo, bhi):
2025-07-01 05:45:31.978 bj = b[j]
2025-07-01 05:45:31.985 cruncher.set_seq2(bj)
2025-07-01 05:45:31.993 for i in range(alo, ahi):
2025-07-01 05:45:32.000 ai = a[i]
2025-07-01 05:45:32.007 if ai == bj:
2025-07-01 05:45:32.016 if eqi is None:
2025-07-01 05:45:32.023 eqi, eqj = i, j
2025-07-01 05:45:32.032 continue
2025-07-01 05:45:32.037 cruncher.set_seq1(ai)
2025-07-01 05:45:32.043 # computing similarity is expensive, so use the quick
2025-07-01 05:45:32.049 # upper bounds first -- have seen this speed up messy
2025-07-01 05:45:32.056 # compares by a factor of 3.
2025-07-01 05:45:32.062 # note that ratio() is only expensive to compute the first
2025-07-01 05:45:32.071 # time it's called on a sequence pair; the expensive part
2025-07-01 05:45:32.078 # of the computation is cached by cruncher
2025-07-01 05:45:32.084 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:45:32.089 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:45:32.094 cruncher.ratio() > best_ratio:
2025-07-01 05:45:32.100 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:45:32.107 if best_ratio < cutoff:
2025-07-01 05:45:32.115 # no non-identical "pretty close" pair
2025-07-01 05:45:32.126 if eqi is None:
2025-07-01 05:45:32.134 # no identical pair either -- treat it as a straight replace
2025-07-01 05:45:32.141 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:45:32.153 return
2025-07-01 05:45:32.160 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:45:32.171 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:45:32.182 else:
2025-07-01 05:45:32.189 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:45:32.195 eqi = None
2025-07-01 05:45:32.202
2025-07-01 05:45:32.207 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:45:32.213 # identical
2025-07-01 05:45:32.219
2025-07-01 05:45:32.225 # pump out diffs from before the synch point
2025-07-01 05:45:32.235 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:45:32.241
2025-07-01 05:45:32.251 # do intraline marking on the synch pair
2025-07-01 05:45:32.261 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:45:32.268 if eqi is None:
2025-07-01 05:45:32.274 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:45:32.279 atags = btags = ""
2025-07-01 05:45:32.284 cruncher.set_seqs(aelt, belt)
2025-07-01 05:45:32.289 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:45:32.294 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:45:32.299 if tag == 'replace':
2025-07-01 05:45:32.305 atags += '^' * la
2025-07-01 05:45:32.311 btags += '^' * lb
2025-07-01 05:45:32.319 elif tag == 'delete':
2025-07-01 05:45:32.324 atags += '-' * la
2025-07-01 05:45:32.330 elif tag == 'insert':
2025-07-01 05:45:32.334 btags += '+' * lb
2025-07-01 05:45:32.339 elif tag == 'equal':
2025-07-01 05:45:32.344 atags += ' ' * la
2025-07-01 05:45:32.349 btags += ' ' * lb
2025-07-01 05:45:32.354 else:
2025-07-01 05:45:32.359 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:45:32.363 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:45:32.368 else:
2025-07-01 05:45:32.373 # the synch pair is identical
2025-07-01 05:45:32.378 yield ' ' + aelt
2025-07-01 05:45:32.383
2025-07-01 05:45:32.389 # pump out diffs from after the synch point
2025-07-01 05:45:32.396 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:45:32.403
2025-07-01 05:45:32.408 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:45:32.412 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:45:32.417
2025-07-01 05:45:32.423 self = <difflib.Differ object at [hex]>
2025-07-01 05:45:32.428 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:45:32.434 alo = 100, ahi = 1101
2025-07-01 05:45:32.441 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:45:32.453 blo = 100, bhi = 1101
2025-07-01 05:45:32.463
2025-07-01 05:45:32.472 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:45:32.479 g = []
2025-07-01 05:45:32.486 if alo < ahi:
2025-07-01 05:45:32.498 if blo < bhi:
2025-07-01 05:45:32.507 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:45:32.513 else:
2025-07-01 05:45:32.520 g = self._dump('-', a, alo, ahi)
2025-07-01 05:45:32.526 elif blo < bhi:
2025-07-01 05:45:32.534 g = self._dump('+', b, blo, bhi)
2025-07-01 05:45:32.540
2025-07-01 05:45:32.546 > yield from g
2025-07-01 05:45:32.553
2025-07-01 05:45:32.560 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:45:32.566 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:45:32.573
2025-07-01 05:45:32.580 self = <difflib.Differ object at [hex]>
2025-07-01 05:45:32.587 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:45:32.594 alo = 100, ahi = 1101
2025-07-01 05:45:32.600 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:45:32.606 blo = 100, bhi = 1101
2025-07-01 05:45:32.612
2025-07-01 05:45:32.619 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:45:32.625 r"""
2025-07-01 05:45:32.630 When replacing one block of lines with another, search the blocks
2025-07-01 05:45:32.636 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:45:32.643 synch point, and intraline difference marking is done on the
2025-07-01 05:45:32.649 similar pair. Lots of work, but often worth it.
2025-07-01 05:45:32.654
2025-07-01 05:45:32.660 Example:
2025-07-01 05:45:32.666
2025-07-01 05:45:32.671 >>> d = Differ()
2025-07-01 05:45:32.678 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:45:32.687 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:45:32.699 >>> print(''.join(results), end="")
2025-07-01 05:45:32.708 - abcDefghiJkl
2025-07-01 05:45:32.721 + abcdefGhijkl
2025-07-01 05:45:32.733 """
2025-07-01 05:45:32.739
2025-07-01 05:45:32.745 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:45:32.751 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:45:32.758 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:45:32.769 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:45:32.781 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:45:32.789
2025-07-01 05:45:32.796 # search for the pair that matches best without being identical
2025-07-01 05:45:32.802 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:45:32.809 # on junk -- unless we have to)
2025-07-01 05:45:32.816 for j in range(blo, bhi):
2025-07-01 05:45:32.821 bj = b[j]
2025-07-01 05:45:32.828 cruncher.set_seq2(bj)
2025-07-01 05:45:32.837 for i in range(alo, ahi):
2025-07-01 05:45:32.847 ai = a[i]
2025-07-01 05:45:32.853 if ai == bj:
2025-07-01 05:45:32.859 if eqi is None:
2025-07-01 05:45:32.866 eqi, eqj = i, j
2025-07-01 05:45:32.872 continue
2025-07-01 05:45:32.879 cruncher.set_seq1(ai)
2025-07-01 05:45:32.886 # computing similarity is expensive, so use the quick
2025-07-01 05:45:32.893 # upper bounds first -- have seen this speed up messy
2025-07-01 05:45:32.899 # compares by a factor of 3.
2025-07-01 05:45:32.907 # note that ratio() is only expensive to compute the first
2025-07-01 05:45:32.915 # time it's called on a sequence pair; the expensive part
2025-07-01 05:45:32.926 # of the computation is cached by cruncher
2025-07-01 05:45:32.933 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:45:32.940 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:45:32.946 cruncher.ratio() > best_ratio:
2025-07-01 05:45:32.952 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:45:32.958 if best_ratio < cutoff:
2025-07-01 05:45:32.964 # no non-identical "pretty close" pair
2025-07-01 05:45:32.970 if eqi is None:
2025-07-01 05:45:32.977 # no identical pair either -- treat it as a straight replace
2025-07-01 05:45:32.984 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:45:32.990 return
2025-07-01 05:45:32.995 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:45:33.003 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:45:33.011 else:
2025-07-01 05:45:33.019 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:45:33.025 eqi = None
2025-07-01 05:45:33.031
2025-07-01 05:45:33.036 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:45:33.041 # identical
2025-07-01 05:45:33.046
2025-07-01 05:45:33.051 # pump out diffs from before the synch point
2025-07-01 05:45:33.056 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:45:33.061
2025-07-01 05:45:33.067 # do intraline marking on the synch pair
2025-07-01 05:45:33.073 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:45:33.080 if eqi is None:
2025-07-01 05:45:33.087 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:45:33.094 atags = btags = ""
2025-07-01 05:45:33.101 cruncher.set_seqs(aelt, belt)
2025-07-01 05:45:33.108 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:45:33.114 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:45:33.121 if tag == 'replace':
2025-07-01 05:45:33.131 atags += '^' * la
2025-07-01 05:45:33.137 btags += '^' * lb
2025-07-01 05:45:33.143 elif tag == 'delete':
2025-07-01 05:45:33.149 atags += '-' * la
2025-07-01 05:45:33.156 elif tag == 'insert':
2025-07-01 05:45:33.162 btags += '+' * lb
2025-07-01 05:45:33.168 elif tag == 'equal':
2025-07-01 05:45:33.174 atags += ' ' * la
2025-07-01 05:45:33.180 btags += ' ' * lb
2025-07-01 05:45:33.186 else:
2025-07-01 05:45:33.192 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:45:33.198 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:45:33.205 else:
2025-07-01 05:45:33.210 # the synch pair is identical
2025-07-01 05:45:33.216 yield ' ' + aelt
2025-07-01 05:45:33.222
2025-07-01 05:45:33.227 # pump out diffs from after the synch point
2025-07-01 05:45:33.233 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:45:33.239
2025-07-01 05:45:33.244 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:45:33.250 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:45:33.254
2025-07-01 05:45:33.260 self = <difflib.Differ object at [hex]>
2025-07-01 05:45:33.272 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:45:33.278 alo = 101, ahi = 1101
2025-07-01 05:45:33.286 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:45:33.294 blo = 101, bhi = 1101
2025-07-01 05:45:33.301
2025-07-01 05:45:33.308 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:45:33.319 g = []
2025-07-01 05:45:33.329 if alo < ahi:
2025-07-01 05:45:33.336 if blo < bhi:
2025-07-01 05:45:33.342 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:45:33.347 else:
2025-07-01 05:45:33.354 g = self._dump('-', a, alo, ahi)
2025-07-01 05:45:33.361 elif blo < bhi:
2025-07-01 05:45:33.367 g = self._dump('+', b, blo, bhi)
2025-07-01 05:45:33.375
2025-07-01 05:45:33.381 > yield from g
2025-07-01 05:45:33.387
2025-07-01 05:45:33.393 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:45:33.399 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:45:33.405
2025-07-01 05:45:33.411 self = <difflib.Differ object at [hex]>
2025-07-01 05:45:33.419 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:45:33.424 alo = 101, ahi = 1101
2025-07-01 05:45:33.438 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:45:33.448 blo = 101, bhi = 1101
2025-07-01 05:45:33.458
2025-07-01 05:45:33.466 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:45:33.472 r"""
2025-07-01 05:45:33.480 When replacing one block of lines with another, search the blocks
2025-07-01 05:45:33.488 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:45:33.495 synch point, and intraline difference marking is done on the
2025-07-01 05:45:33.501 similar pair. Lots of work, but often worth it.
2025-07-01 05:45:33.508
2025-07-01 05:45:33.514 Example:
2025-07-01 05:45:33.521
2025-07-01 05:45:33.528 >>> d = Differ()
2025-07-01 05:45:33.535 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:45:33.544 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:45:33.555 >>> print(''.join(results), end="")
2025-07-01 05:45:33.564 - abcDefghiJkl
2025-07-01 05:45:33.576 + abcdefGhijkl
2025-07-01 05:45:33.588 """
2025-07-01 05:45:33.595
2025-07-01 05:45:33.603 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:45:33.610 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:45:33.616 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:45:33.621 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:45:33.626 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:45:33.633
2025-07-01 05:45:33.643 # search for the pair that matches best without being identical
2025-07-01 05:45:33.652 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:45:33.658 # on junk -- unless we have to)
2025-07-01 05:45:33.664 for j in range(blo, bhi):
2025-07-01 05:45:33.670 bj = b[j]
2025-07-01 05:45:33.676 cruncher.set_seq2(bj)
2025-07-01 05:45:33.683 for i in range(alo, ahi):
2025-07-01 05:45:33.689 ai = a[i]
2025-07-01 05:45:33.696 if ai == bj:
2025-07-01 05:45:33.703 if eqi is None:
2025-07-01 05:45:33.710 eqi, eqj = i, j
2025-07-01 05:45:33.719 continue
2025-07-01 05:45:33.731 cruncher.set_seq1(ai)
2025-07-01 05:45:33.739 # computing similarity is expensive, so use the quick
2025-07-01 05:45:33.745 # upper bounds first -- have seen this speed up messy
2025-07-01 05:45:33.750 # compares by a factor of 3.
2025-07-01 05:45:33.755 # note that ratio() is only expensive to compute the first
2025-07-01 05:45:33.760 # time it's called on a sequence pair; the expensive part
2025-07-01 05:45:33.764 # of the computation is cached by cruncher
2025-07-01 05:45:33.769 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:45:33.774 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:45:33.779 cruncher.ratio() > best_ratio:
2025-07-01 05:45:33.785 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:45:33.790 if best_ratio < cutoff:
2025-07-01 05:45:33.796 # no non-identical "pretty close" pair
2025-07-01 05:45:33.802 if eqi is None:
2025-07-01 05:45:33.811 # no identical pair either -- treat it as a straight replace
2025-07-01 05:45:33.820 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:45:33.828 return
2025-07-01 05:45:33.834 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:45:33.846 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:45:33.854 else:
2025-07-01 05:45:33.861 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:45:33.869 eqi = None
2025-07-01 05:45:33.876
2025-07-01 05:45:33.883 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:45:33.889 # identical
2025-07-01 05:45:33.894
2025-07-01 05:45:33.899 # pump out diffs from before the synch point
2025-07-01 05:45:33.904 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:45:33.911
2025-07-01 05:45:33.919 # do intraline marking on the synch pair
2025-07-01 05:45:33.928 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:45:33.940 if eqi is None:
2025-07-01 05:45:33.949 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:45:33.961 atags = btags = ""
2025-07-01 05:45:33.971 cruncher.set_seqs(aelt, belt)
2025-07-01 05:45:33.980 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:45:33.987 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:45:33.994 if tag == 'replace':
2025-07-01 05:45:34.000 atags += '^' * la
2025-07-01 05:45:34.007 btags += '^' * lb
2025-07-01 05:45:34.012 elif tag == 'delete':
2025-07-01 05:45:34.019 atags += '-' * la
2025-07-01 05:45:34.027 elif tag == 'insert':
2025-07-01 05:45:34.038 btags += '+' * lb
2025-07-01 05:45:34.046 elif tag == 'equal':
2025-07-01 05:45:34.054 atags += ' ' * la
2025-07-01 05:45:34.066 btags += ' ' * lb
2025-07-01 05:45:34.076 else:
2025-07-01 05:45:34.083 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:45:34.090 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:45:34.097 else:
2025-07-01 05:45:34.103 # the synch pair is identical
2025-07-01 05:45:34.109 yield ' ' + aelt
2025-07-01 05:45:34.116
2025-07-01 05:45:34.124 # pump out diffs from after the synch point
2025-07-01 05:45:34.131 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:45:34.137
2025-07-01 05:45:34.143 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:45:34.150 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:45:34.156
2025-07-01 05:45:34.162 self = <difflib.Differ object at [hex]>
2025-07-01 05:45:34.171 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:45:34.183 alo = 102, ahi = 1101
2025-07-01 05:45:34.192 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:45:34.199 blo = 102, bhi = 1101
2025-07-01 05:45:34.208
2025-07-01 05:45:34.217 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:45:34.223 g = []
2025-07-01 05:45:34.231 if alo < ahi:
2025-07-01 05:45:34.241 if blo < bhi:
2025-07-01 05:45:34.251 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:45:34.258 else:
2025-07-01 05:45:34.269 g = self._dump('-', a, alo, ahi)
2025-07-01 05:45:34.280 elif blo < bhi:
2025-07-01 05:45:34.291 g = self._dump('+', b, blo, bhi)
2025-07-01 05:45:34.299
2025-07-01 05:45:34.306 > yield from g
2025-07-01 05:45:34.311
2025-07-01 05:45:34.317 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:45:34.323 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:45:34.329
2025-07-01 05:45:34.335 self = <difflib.Differ object at [hex]>
2025-07-01 05:45:34.343 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:45:34.351 alo = 102, ahi = 1101
2025-07-01 05:45:34.363 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:45:34.372 blo = 102, bhi = 1101
2025-07-01 05:45:34.379
2025-07-01 05:45:34.385 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:45:34.391 r"""
2025-07-01 05:45:34.397 When replacing one block of lines with another, search the blocks
2025-07-01 05:45:34.403 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:45:34.407 synch point, and intraline difference marking is done on the
2025-07-01 05:45:34.413 similar pair. Lots of work, but often worth it.
2025-07-01 05:45:34.419
2025-07-01 05:45:34.427 Example:
2025-07-01 05:45:34.434
2025-07-01 05:45:34.441 >>> d = Differ()
2025-07-01 05:45:34.446 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:45:34.451 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:45:34.455 >>> print(''.join(results), end="")
2025-07-01 05:45:34.460 - abcDefghiJkl
2025-07-01 05:45:34.471 + abcdefGhijkl
2025-07-01 05:45:34.482 """
2025-07-01 05:45:34.487
2025-07-01 05:45:34.492 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:45:34.497 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:45:34.505 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:45:34.516 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:45:34.528 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:45:34.537
2025-07-01 05:45:34.549 # search for the pair that matches best without being identical
2025-07-01 05:45:34.558 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:45:34.569 # on junk -- unless we have to)
2025-07-01 05:45:34.577 for j in range(blo, bhi):
2025-07-01 05:45:34.589 bj = b[j]
2025-07-01 05:45:34.599 cruncher.set_seq2(bj)
2025-07-01 05:45:34.611 for i in range(alo, ahi):
2025-07-01 05:45:34.621 ai = a[i]
2025-07-01 05:45:34.633 if ai == bj:
2025-07-01 05:45:34.639 if eqi is None:
2025-07-01 05:45:34.646 eqi, eqj = i, j
2025-07-01 05:45:34.660 continue
2025-07-01 05:45:34.672 cruncher.set_seq1(ai)
2025-07-01 05:45:34.681 # computing similarity is expensive, so use the quick
2025-07-01 05:45:34.691 # upper bounds first -- have seen this speed up messy
2025-07-01 05:45:34.697 # compares by a factor of 3.
2025-07-01 05:45:34.703 # note that ratio() is only expensive to compute the first
2025-07-01 05:45:34.709 # time it's called on a sequence pair; the expensive part
2025-07-01 05:45:34.715 # of the computation is cached by cruncher
2025-07-01 05:45:34.721 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:45:34.732 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:45:34.744 cruncher.ratio() > best_ratio:
2025-07-01 05:45:34.755 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:45:34.764 if best_ratio < cutoff:
2025-07-01 05:45:34.773 # no non-identical "pretty close" pair
2025-07-01 05:45:34.779 if eqi is None:
2025-07-01 05:45:34.786 # no identical pair either -- treat it as a straight replace
2025-07-01 05:45:34.793 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:45:34.800 return
2025-07-01 05:45:34.808 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:45:34.815 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:45:34.822 else:
2025-07-01 05:45:34.829 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:45:34.836 eqi = None
2025-07-01 05:45:34.843
2025-07-01 05:45:34.849 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:45:34.854 # identical
2025-07-01 05:45:34.860
2025-07-01 05:45:34.866 # pump out diffs from before the synch point
2025-07-01 05:45:34.876 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:45:34.885
2025-07-01 05:45:34.893 # do intraline marking on the synch pair
2025-07-01 05:45:34.899 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:45:34.904 if eqi is None:
2025-07-01 05:45:34.910 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:45:34.914 atags = btags = ""
2025-07-01 05:45:34.919 cruncher.set_seqs(aelt, belt)
2025-07-01 05:45:34.924 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:45:34.929 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:45:34.935 if tag == 'replace':
2025-07-01 05:45:34.941 atags += '^' * la
2025-07-01 05:45:34.950 btags += '^' * lb
2025-07-01 05:45:34.957 elif tag == 'delete':
2025-07-01 05:45:34.963 atags += '-' * la
2025-07-01 05:45:34.968 elif tag == 'insert':
2025-07-01 05:45:34.974 btags += '+' * lb
2025-07-01 05:45:34.980 elif tag == 'equal':
2025-07-01 05:45:34.987 atags += ' ' * la
2025-07-01 05:45:34.996 btags += ' ' * lb
2025-07-01 05:45:35.006 else:
2025-07-01 05:45:35.013 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:45:35.020 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:45:35.027 else:
2025-07-01 05:45:35.034 # the synch pair is identical
2025-07-01 05:45:35.044 yield ' ' + aelt
2025-07-01 05:45:35.054
2025-07-01 05:45:35.062 # pump out diffs from after the synch point
2025-07-01 05:45:35.069 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:45:35.075
2025-07-01 05:45:35.081 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:45:35.093 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:45:35.102
2025-07-01 05:45:35.113 self = <difflib.Differ object at [hex]>
2025-07-01 05:45:35.124 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:45:35.131 alo = 103, ahi = 1101
2025-07-01 05:45:35.139 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:45:35.146 blo = 103, bhi = 1101
2025-07-01 05:45:35.151
2025-07-01 05:45:35.156 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:45:35.163 g = []
2025-07-01 05:45:35.174 if alo < ahi:
2025-07-01 05:45:35.187 if blo < bhi:
2025-07-01 05:45:35.197 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:45:35.208 else:
2025-07-01 05:45:35.219 g = self._dump('-', a, alo, ahi)
2025-07-01 05:45:35.229 elif blo < bhi:
2025-07-01 05:45:35.240 g = self._dump('+', b, blo, bhi)
2025-07-01 05:45:35.245
2025-07-01 05:45:35.250 > yield from g
2025-07-01 05:45:35.255
2025-07-01 05:45:35.260 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:45:35.265 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:45:35.270
2025-07-01 05:45:35.275 self = <difflib.Differ object at [hex]>
2025-07-01 05:45:35.282 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:45:35.288 alo = 103, ahi = 1101
2025-07-01 05:45:35.294 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:45:35.302 blo = 103, bhi = 1101
2025-07-01 05:45:35.314
2025-07-01 05:45:35.323 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:45:35.330 r"""
2025-07-01 05:45:35.338 When replacing one block of lines with another, search the blocks
2025-07-01 05:45:35.344 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:45:35.351 synch point, and intraline difference marking is done on the
2025-07-01 05:45:35.356 similar pair. Lots of work, but often worth it.
2025-07-01 05:45:35.360
2025-07-01 05:45:35.366 Example:
2025-07-01 05:45:35.374
2025-07-01 05:45:35.381 >>> d = Differ()
2025-07-01 05:45:35.387 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:45:35.393 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:45:35.398 >>> print(''.join(results), end="")
2025-07-01 05:45:35.403 - abcDefghiJkl
2025-07-01 05:45:35.419 + abcdefGhijkl
2025-07-01 05:45:35.432 """
2025-07-01 05:45:35.438
2025-07-01 05:45:35.445 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:45:35.451 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:45:35.459 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:45:35.469 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:45:35.477 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:45:35.483
2025-07-01 05:45:35.494 # search for the pair that matches best without being identical
2025-07-01 05:45:35.505 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:45:35.517 # on junk -- unless we have to)
2025-07-01 05:45:35.530 for j in range(blo, bhi):
2025-07-01 05:45:35.543 bj = b[j]
2025-07-01 05:45:35.553 cruncher.set_seq2(bj)
2025-07-01 05:45:35.562 for i in range(alo, ahi):
2025-07-01 05:45:35.566 ai = a[i]
2025-07-01 05:45:35.575 if ai == bj:
2025-07-01 05:45:35.584 if eqi is None:
2025-07-01 05:45:35.590 eqi, eqj = i, j
2025-07-01 05:45:35.598 continue
2025-07-01 05:45:35.609 cruncher.set_seq1(ai)
2025-07-01 05:45:35.619 # computing similarity is expensive, so use the quick
2025-07-01 05:45:35.627 # upper bounds first -- have seen this speed up messy
2025-07-01 05:45:35.635 # compares by a factor of 3.
2025-07-01 05:45:35.641 # note that ratio() is only expensive to compute the first
2025-07-01 05:45:35.646 # time it's called on a sequence pair; the expensive part
2025-07-01 05:45:35.652 # of the computation is cached by cruncher
2025-07-01 05:45:35.658 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:45:35.666 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:45:35.677 cruncher.ratio() > best_ratio:
2025-07-01 05:45:35.687 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:45:35.695 if best_ratio < cutoff:
2025-07-01 05:45:35.704 # no non-identical "pretty close" pair
2025-07-01 05:45:35.712 if eqi is None:
2025-07-01 05:45:35.720 # no identical pair either -- treat it as a straight replace
2025-07-01 05:45:35.726 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:45:35.736 return
2025-07-01 05:45:35.745 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:45:35.753 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:45:35.760 else:
2025-07-01 05:45:35.766 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:45:35.772 eqi = None
2025-07-01 05:45:35.779
2025-07-01 05:45:35.790 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:45:35.801 # identical
2025-07-01 05:45:35.812
2025-07-01 05:45:35.822 # pump out diffs from before the synch point
2025-07-01 05:45:35.831 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:45:35.838
2025-07-01 05:45:35.849 # do intraline marking on the synch pair
2025-07-01 05:45:35.860 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:45:35.869 if eqi is None:
2025-07-01 05:45:35.876 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:45:35.882 atags = btags = ""
2025-07-01 05:45:35.888 cruncher.set_seqs(aelt, belt)
2025-07-01 05:45:35.894 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:45:35.899 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:45:35.907 if tag == 'replace':
2025-07-01 05:45:35.916 atags += '^' * la
2025-07-01 05:45:35.924 btags += '^' * lb
2025-07-01 05:45:35.931 elif tag == 'delete':
2025-07-01 05:45:35.941 atags += '-' * la
2025-07-01 05:45:35.949 elif tag == 'insert':
2025-07-01 05:45:35.955 btags += '+' * lb
2025-07-01 05:45:35.961 elif tag == 'equal':
2025-07-01 05:45:35.967 atags += ' ' * la
2025-07-01 05:45:35.976 btags += ' ' * lb
2025-07-01 05:45:35.982 else:
2025-07-01 05:45:35.988 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:45:35.994 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:45:35.999 else:
2025-07-01 05:45:36.004 # the synch pair is identical
2025-07-01 05:45:36.009 yield ' ' + aelt
2025-07-01 05:45:36.013
2025-07-01 05:45:36.018 # pump out diffs from after the synch point
2025-07-01 05:45:36.024 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:45:36.028
2025-07-01 05:45:36.034 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:45:36.040 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:45:36.046
2025-07-01 05:45:36.057 self = <difflib.Differ object at [hex]>
2025-07-01 05:45:36.065 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:45:36.071 alo = 104, ahi = 1101
2025-07-01 05:45:36.078 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:45:36.087 blo = 104, bhi = 1101
2025-07-01 05:45:36.099
2025-07-01 05:45:36.109 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:45:36.116 g = []
2025-07-01 05:45:36.122 if alo < ahi:
2025-07-01 05:45:36.128 if blo < bhi:
2025-07-01 05:45:36.140 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:45:36.149 else:
2025-07-01 05:45:36.157 g = self._dump('-', a, alo, ahi)
2025-07-01 05:45:36.164 elif blo < bhi:
2025-07-01 05:45:36.171 g = self._dump('+', b, blo, bhi)
2025-07-01 05:45:36.177
2025-07-01 05:45:36.188 > yield from g
2025-07-01 05:45:36.200
2025-07-01 05:45:36.209 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:45:36.218 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:45:36.226
2025-07-01 05:45:36.233 self = <difflib.Differ object at [hex]>
2025-07-01 05:45:36.241 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:45:36.247 alo = 104, ahi = 1101
2025-07-01 05:45:36.257 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:45:36.269 blo = 104, bhi = 1101
2025-07-01 05:45:36.277
2025-07-01 05:45:36.285 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:45:36.297 r"""
2025-07-01 05:45:36.310 When replacing one block of lines with another, search the blocks
2025-07-01 05:45:36.320 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:45:36.328 synch point, and intraline difference marking is done on the
2025-07-01 05:45:36.335 similar pair. Lots of work, but often worth it.
2025-07-01 05:45:36.346
2025-07-01 05:45:36.355 Example:
2025-07-01 05:45:36.361
2025-07-01 05:45:36.369 >>> d = Differ()
2025-07-01 05:45:36.374 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:45:36.380 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:45:36.388 >>> print(''.join(results), end="")
2025-07-01 05:45:36.394 - abcDefghiJkl
2025-07-01 05:45:36.412 + abcdefGhijkl
2025-07-01 05:45:36.423 """
2025-07-01 05:45:36.428
2025-07-01 05:45:36.433 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:45:36.439 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:45:36.450 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:45:36.463 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:45:36.473 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:45:36.481
2025-07-01 05:45:36.489 # search for the pair that matches best without being identical
2025-07-01 05:45:36.496 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:45:36.502 # on junk -- unless we have to)
2025-07-01 05:45:36.514 for j in range(blo, bhi):
2025-07-01 05:45:36.526 bj = b[j]
2025-07-01 05:45:36.537 cruncher.set_seq2(bj)
2025-07-01 05:45:36.547 for i in range(alo, ahi):
2025-07-01 05:45:36.558 ai = a[i]
2025-07-01 05:45:36.567 if ai == bj:
2025-07-01 05:45:36.574 if eqi is None:
2025-07-01 05:45:36.580 eqi, eqj = i, j
2025-07-01 05:45:36.586 continue
2025-07-01 05:45:36.591 cruncher.set_seq1(ai)
2025-07-01 05:45:36.602 # computing similarity is expensive, so use the quick
2025-07-01 05:45:36.614 # upper bounds first -- have seen this speed up messy
2025-07-01 05:45:36.624 # compares by a factor of 3.
2025-07-01 05:45:36.631 # note that ratio() is only expensive to compute the first
2025-07-01 05:45:36.641 # time it's called on a sequence pair; the expensive part
2025-07-01 05:45:36.654 # of the computation is cached by cruncher
2025-07-01 05:45:36.668 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:45:36.676 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:45:36.685 cruncher.ratio() > best_ratio:
2025-07-01 05:45:36.691 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:45:36.698 if best_ratio < cutoff:
2025-07-01 05:45:36.704 # no non-identical "pretty close" pair
2025-07-01 05:45:36.710 if eqi is None:
2025-07-01 05:45:36.716 # no identical pair either -- treat it as a straight replace
2025-07-01 05:45:36.721 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:45:36.726 return
2025-07-01 05:45:36.732 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:45:36.739 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:45:36.746 else:
2025-07-01 05:45:36.753 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:45:36.759 eqi = None
2025-07-01 05:45:36.766
2025-07-01 05:45:36.771 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:45:36.777 # identical
2025-07-01 05:45:36.782
2025-07-01 05:45:36.788 # pump out diffs from before the synch point
2025-07-01 05:45:36.795 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:45:36.801
2025-07-01 05:45:36.808 # do intraline marking on the synch pair
2025-07-01 05:45:36.815 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:45:36.822 if eqi is None:
2025-07-01 05:45:36.829 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:45:36.835 atags = btags = ""
2025-07-01 05:45:36.842 cruncher.set_seqs(aelt, belt)
2025-07-01 05:45:36.851 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:45:36.862 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:45:36.870 if tag == 'replace':
2025-07-01 05:45:36.877 atags += '^' * la
2025-07-01 05:45:36.882 btags += '^' * lb
2025-07-01 05:45:36.887 elif tag == 'delete':
2025-07-01 05:45:36.892 atags += '-' * la
2025-07-01 05:45:36.896 elif tag == 'insert':
2025-07-01 05:45:36.901 btags += '+' * lb
2025-07-01 05:45:36.905 elif tag == 'equal':
2025-07-01 05:45:36.910 atags += ' ' * la
2025-07-01 05:45:36.917 btags += ' ' * lb
2025-07-01 05:45:36.922 else:
2025-07-01 05:45:36.927 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:45:36.933 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:45:36.939 else:
2025-07-01 05:45:36.946 # the synch pair is identical
2025-07-01 05:45:36.952 yield ' ' + aelt
2025-07-01 05:45:36.958
2025-07-01 05:45:36.964 # pump out diffs from after the synch point
2025-07-01 05:45:36.970 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:45:36.975
2025-07-01 05:45:36.980 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:45:36.986 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:45:36.995
2025-07-01 05:45:37.004 self = <difflib.Differ object at [hex]>
2025-07-01 05:45:37.011 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:45:37.018 alo = 105, ahi = 1101
2025-07-01 05:45:37.027 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:45:37.034 blo = 105, bhi = 1101
2025-07-01 05:45:37.040
2025-07-01 05:45:37.048 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:45:37.055 g = []
2025-07-01 05:45:37.061 if alo < ahi:
2025-07-01 05:45:37.067 if blo < bhi:
2025-07-01 05:45:37.072 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:45:37.078 else:
2025-07-01 05:45:37.083 g = self._dump('-', a, alo, ahi)
2025-07-01 05:45:37.088 elif blo < bhi:
2025-07-01 05:45:37.093 g = self._dump('+', b, blo, bhi)
2025-07-01 05:45:37.099
2025-07-01 05:45:37.104 > yield from g
2025-07-01 05:45:37.110
2025-07-01 05:45:37.120 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:45:37.131 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:45:37.140
2025-07-01 05:45:37.149 self = <difflib.Differ object at [hex]>
2025-07-01 05:45:37.157 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:45:37.164 alo = 105, ahi = 1101
2025-07-01 05:45:37.175 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:45:37.187 blo = 105, bhi = 1101
2025-07-01 05:45:37.198
2025-07-01 05:45:37.211 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:45:37.223 r"""
2025-07-01 05:45:37.235 When replacing one block of lines with another, search the blocks
2025-07-01 05:45:37.245 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:45:37.257 synch point, and intraline difference marking is done on the
2025-07-01 05:45:37.267 similar pair. Lots of work, but often worth it.
2025-07-01 05:45:37.276
2025-07-01 05:45:37.284 Example:
2025-07-01 05:45:37.292
2025-07-01 05:45:37.298 >>> d = Differ()
2025-07-01 05:45:37.303 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:45:37.309 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:45:37.315 >>> print(''.join(results), end="")
2025-07-01 05:45:37.320 - abcDefghiJkl
2025-07-01 05:45:37.331 + abcdefGhijkl
2025-07-01 05:45:37.344 """
2025-07-01 05:45:37.350
2025-07-01 05:45:37.361 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:45:37.372 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:45:37.381 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:45:37.388 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:45:37.395 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:45:37.401
2025-07-01 05:45:37.406 # search for the pair that matches best without being identical
2025-07-01 05:45:37.413 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:45:37.418 # on junk -- unless we have to)
2025-07-01 05:45:37.424 for j in range(blo, bhi):
2025-07-01 05:45:37.433 bj = b[j]
2025-07-01 05:45:37.443 cruncher.set_seq2(bj)
2025-07-01 05:45:37.451 for i in range(alo, ahi):
2025-07-01 05:45:37.458 ai = a[i]
2025-07-01 05:45:37.468 if ai == bj:
2025-07-01 05:45:37.477 if eqi is None:
2025-07-01 05:45:37.489 eqi, eqj = i, j
2025-07-01 05:45:37.500 continue
2025-07-01 05:45:37.512 cruncher.set_seq1(ai)
2025-07-01 05:45:37.524 # computing similarity is expensive, so use the quick
2025-07-01 05:45:37.534 # upper bounds first -- have seen this speed up messy
2025-07-01 05:45:37.546 # compares by a factor of 3.
2025-07-01 05:45:37.559 # note that ratio() is only expensive to compute the first
2025-07-01 05:45:37.569 # time it's called on a sequence pair; the expensive part
2025-07-01 05:45:37.580 # of the computation is cached by cruncher
2025-07-01 05:45:37.590 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:45:37.598 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:45:37.607 cruncher.ratio() > best_ratio:
2025-07-01 05:45:37.618 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:45:37.626 if best_ratio < cutoff:
2025-07-01 05:45:37.636 # no non-identical "pretty close" pair
2025-07-01 05:45:37.647 if eqi is None:
2025-07-01 05:45:37.655 # no identical pair either -- treat it as a straight replace
2025-07-01 05:45:37.664 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:45:37.672 return
2025-07-01 05:45:37.681 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:45:37.689 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:45:37.696 else:
2025-07-01 05:45:37.702 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:45:37.708 eqi = None
2025-07-01 05:45:37.713
2025-07-01 05:45:37.719 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:45:37.731 # identical
2025-07-01 05:45:37.741
2025-07-01 05:45:37.749 # pump out diffs from before the synch point
2025-07-01 05:45:37.756 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:45:37.761
2025-07-01 05:45:37.767 # do intraline marking on the synch pair
2025-07-01 05:45:37.775 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:45:37.787 if eqi is None:
2025-07-01 05:45:37.797 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:45:37.809 atags = btags = ""
2025-07-01 05:45:37.822 cruncher.set_seqs(aelt, belt)
2025-07-01 05:45:37.834 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:45:37.847 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:45:37.859 if tag == 'replace':
2025-07-01 05:45:37.869 atags += '^' * la
2025-07-01 05:45:37.877 btags += '^' * lb
2025-07-01 05:45:37.885 elif tag == 'delete':
2025-07-01 05:45:37.891 atags += '-' * la
2025-07-01 05:45:37.898 elif tag == 'insert':
2025-07-01 05:45:37.904 btags += '+' * lb
2025-07-01 05:45:37.917 elif tag == 'equal':
2025-07-01 05:45:37.926 atags += ' ' * la
2025-07-01 05:45:37.933 btags += ' ' * lb
2025-07-01 05:45:37.939 else:
2025-07-01 05:45:37.945 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:45:37.953 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:45:37.962 else:
2025-07-01 05:45:37.970 # the synch pair is identical
2025-07-01 05:45:37.977 yield ' ' + aelt
2025-07-01 05:45:37.983
2025-07-01 05:45:37.989 # pump out diffs from after the synch point
2025-07-01 05:45:37.994 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:45:38.000
2025-07-01 05:45:38.006 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:45:38.012 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:45:38.020
2025-07-01 05:45:38.032 self = <difflib.Differ object at [hex]>
2025-07-01 05:45:38.041 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:45:38.047 alo = 106, ahi = 1101
2025-07-01 05:45:38.054 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:45:38.060 blo = 106, bhi = 1101
2025-07-01 05:45:38.067
2025-07-01 05:45:38.078 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:45:38.090 g = []
2025-07-01 05:45:38.100 if alo < ahi:
2025-07-01 05:45:38.111 if blo < bhi:
2025-07-01 05:45:38.121 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:45:38.128 else:
2025-07-01 05:45:38.135 g = self._dump('-', a, alo, ahi)
2025-07-01 05:45:38.141 elif blo < bhi:
2025-07-01 05:45:38.148 g = self._dump('+', b, blo, bhi)
2025-07-01 05:45:38.155
2025-07-01 05:45:38.165 > yield from g
2025-07-01 05:45:38.177
2025-07-01 05:45:38.187 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:45:38.196 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:45:38.208
2025-07-01 05:45:38.221 self = <difflib.Differ object at [hex]>
2025-07-01 05:45:38.231 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:45:38.238 alo = 106, ahi = 1101
2025-07-01 05:45:38.247 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:45:38.255 blo = 106, bhi = 1101
2025-07-01 05:45:38.266
2025-07-01 05:45:38.276 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:45:38.288 r"""
2025-07-01 05:45:38.300 When replacing one block of lines with another, search the blocks
2025-07-01 05:45:38.309 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:45:38.318 synch point, and intraline difference marking is done on the
2025-07-01 05:45:38.329 similar pair. Lots of work, but often worth it.
2025-07-01 05:45:38.340
2025-07-01 05:45:38.348 Example:
2025-07-01 05:45:38.355
2025-07-01 05:45:38.363 >>> d = Differ()
2025-07-01 05:45:38.373 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:45:38.383 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:45:38.397 >>> print(''.join(results), end="")
2025-07-01 05:45:38.407 - abcDefghiJkl
2025-07-01 05:45:38.423 + abcdefGhijkl
2025-07-01 05:45:38.444 """
2025-07-01 05:45:38.451
2025-07-01 05:45:38.458 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:45:38.468 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:45:38.480 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:45:38.489 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:45:38.498 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:45:38.509
2025-07-01 05:45:38.520 # search for the pair that matches best without being identical
2025-07-01 05:45:38.529 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:45:38.535 # on junk -- unless we have to)
2025-07-01 05:45:38.542 for j in range(blo, bhi):
2025-07-01 05:45:38.548 bj = b[j]
2025-07-01 05:45:38.555 cruncher.set_seq2(bj)
2025-07-01 05:45:38.564 for i in range(alo, ahi):
2025-07-01 05:45:38.572 ai = a[i]
2025-07-01 05:45:38.581 if ai == bj:
2025-07-01 05:45:38.588 if eqi is None:
2025-07-01 05:45:38.595 eqi, eqj = i, j
2025-07-01 05:45:38.600 continue
2025-07-01 05:45:38.606 cruncher.set_seq1(ai)
2025-07-01 05:45:38.616 # computing similarity is expensive, so use the quick
2025-07-01 05:45:38.623 # upper bounds first -- have seen this speed up messy
2025-07-01 05:45:38.631 # compares by a factor of 3.
2025-07-01 05:45:38.638 # note that ratio() is only expensive to compute the first
2025-07-01 05:45:38.648 # time it's called on a sequence pair; the expensive part
2025-07-01 05:45:38.659 # of the computation is cached by cruncher
2025-07-01 05:45:38.670 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:45:38.677 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:45:38.685 cruncher.ratio() > best_ratio:
2025-07-01 05:45:38.694 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:45:38.706 if best_ratio < cutoff:
2025-07-01 05:45:38.718 # no non-identical "pretty close" pair
2025-07-01 05:45:38.726 if eqi is None:
2025-07-01 05:45:38.732 # no identical pair either -- treat it as a straight replace
2025-07-01 05:45:38.738 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:45:38.747 return
2025-07-01 05:45:38.759 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:45:38.768 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:45:38.775 else:
2025-07-01 05:45:38.783 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:45:38.793 eqi = None
2025-07-01 05:45:38.803
2025-07-01 05:45:38.808 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:45:38.814 # identical
2025-07-01 05:45:38.821
2025-07-01 05:45:38.827 # pump out diffs from before the synch point
2025-07-01 05:45:38.832 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:45:38.838
2025-07-01 05:45:38.849 # do intraline marking on the synch pair
2025-07-01 05:45:38.859 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:45:38.866 if eqi is None:
2025-07-01 05:45:38.876 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:45:38.884 atags = btags = ""
2025-07-01 05:45:38.894 cruncher.set_seqs(aelt, belt)
2025-07-01 05:45:38.905 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:45:38.916 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:45:38.923 if tag == 'replace':
2025-07-01 05:45:38.933 atags += '^' * la
2025-07-01 05:45:38.946 btags += '^' * lb
2025-07-01 05:45:38.957 elif tag == 'delete':
2025-07-01 05:45:38.966 atags += '-' * la
2025-07-01 05:45:38.977 elif tag == 'insert':
2025-07-01 05:45:38.986 btags += '+' * lb
2025-07-01 05:45:38.995 elif tag == 'equal':
2025-07-01 05:45:39.004 atags += ' ' * la
2025-07-01 05:45:39.016 btags += ' ' * lb
2025-07-01 05:45:39.025 else:
2025-07-01 05:45:39.035 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:45:39.045 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:45:39.053 else:
2025-07-01 05:45:39.059 # the synch pair is identical
2025-07-01 05:45:39.065 yield ' ' + aelt
2025-07-01 05:45:39.072
2025-07-01 05:45:39.082 # pump out diffs from after the synch point
2025-07-01 05:45:39.093 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:45:39.102
2025-07-01 05:45:39.110 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:45:39.117 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:45:39.123
2025-07-01 05:45:39.129 self = <difflib.Differ object at [hex]>
2025-07-01 05:45:39.141 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:45:39.151 alo = 107, ahi = 1101
2025-07-01 05:45:39.158 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:45:39.165 blo = 107, bhi = 1101
2025-07-01 05:45:39.172
2025-07-01 05:45:39.179 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:45:39.191 g = []
2025-07-01 05:45:39.200 if alo < ahi:
2025-07-01 05:45:39.207 if blo < bhi:
2025-07-01 05:45:39.213 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:45:39.219 else:
2025-07-01 05:45:39.225 g = self._dump('-', a, alo, ahi)
2025-07-01 05:45:39.232 elif blo < bhi:
2025-07-01 05:45:39.239 g = self._dump('+', b, blo, bhi)
2025-07-01 05:45:39.246
2025-07-01 05:45:39.257 > yield from g
2025-07-01 05:45:39.267
2025-07-01 05:45:39.275 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:45:39.282 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:45:39.288
2025-07-01 05:45:39.294 self = <difflib.Differ object at [hex]>
2025-07-01 05:45:39.302 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:45:39.308 alo = 107, ahi = 1101
2025-07-01 05:45:39.316 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:45:39.323 blo = 107, bhi = 1101
2025-07-01 05:45:39.330
2025-07-01 05:45:39.341 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:45:39.350 r"""
2025-07-01 05:45:39.357 When replacing one block of lines with another, search the blocks
2025-07-01 05:45:39.364 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:45:39.371 synch point, and intraline difference marking is done on the
2025-07-01 05:45:39.379 similar pair. Lots of work, but often worth it.
2025-07-01 05:45:39.386
2025-07-01 05:45:39.393 Example:
2025-07-01 05:45:39.400
2025-07-01 05:45:39.407 >>> d = Differ()
2025-07-01 05:45:39.415 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:45:39.423 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:45:39.430 >>> print(''.join(results), end="")
2025-07-01 05:45:39.438 - abcDefghiJkl
2025-07-01 05:45:39.455 + abcdefGhijkl
2025-07-01 05:45:39.471 """
2025-07-01 05:45:39.482
2025-07-01 05:45:39.489 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:45:39.495 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:45:39.501 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:45:39.507 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:45:39.515 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:45:39.525
2025-07-01 05:45:39.536 # search for the pair that matches best without being identical
2025-07-01 05:45:39.549 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:45:39.561 # on junk -- unless we have to)
2025-07-01 05:45:39.571 for j in range(blo, bhi):
2025-07-01 05:45:39.579 bj = b[j]
2025-07-01 05:45:39.593 cruncher.set_seq2(bj)
2025-07-01 05:45:39.601 for i in range(alo, ahi):
2025-07-01 05:45:39.607 ai = a[i]
2025-07-01 05:45:39.615 if ai == bj:
2025-07-01 05:45:39.621 if eqi is None:
2025-07-01 05:45:39.627 eqi, eqj = i, j
2025-07-01 05:45:39.633 continue
2025-07-01 05:45:39.638 cruncher.set_seq1(ai)
2025-07-01 05:45:39.642 # computing similarity is expensive, so use the quick
2025-07-01 05:45:39.647 # upper bounds first -- have seen this speed up messy
2025-07-01 05:45:39.655 # compares by a factor of 3.
2025-07-01 05:45:39.662 # note that ratio() is only expensive to compute the first
2025-07-01 05:45:39.668 # time it's called on a sequence pair; the expensive part
2025-07-01 05:45:39.674 # of the computation is cached by cruncher
2025-07-01 05:45:39.679 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:45:39.687 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:45:39.697 cruncher.ratio() > best_ratio:
2025-07-01 05:45:39.709 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:45:39.721 if best_ratio < cutoff:
2025-07-01 05:45:39.732 # no non-identical "pretty close" pair
2025-07-01 05:45:39.743 if eqi is None:
2025-07-01 05:45:39.753 # no identical pair either -- treat it as a straight replace
2025-07-01 05:45:39.763 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:45:39.770 return
2025-07-01 05:45:39.776 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:45:39.782 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:45:39.788 else:
2025-07-01 05:45:39.794 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:45:39.804 eqi = None
2025-07-01 05:45:39.813
2025-07-01 05:45:39.821 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:45:39.826 # identical
2025-07-01 05:45:39.835
2025-07-01 05:45:39.843 # pump out diffs from before the synch point
2025-07-01 05:45:39.851 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:45:39.857
2025-07-01 05:45:39.863 # do intraline marking on the synch pair
2025-07-01 05:45:39.869 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:45:39.874 if eqi is None:
2025-07-01 05:45:39.881 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:45:39.887 atags = btags = ""
2025-07-01 05:45:39.892 cruncher.set_seqs(aelt, belt)
2025-07-01 05:45:39.897 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:45:39.903 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:45:39.908 if tag == 'replace':
2025-07-01 05:45:39.914 atags += '^' * la
2025-07-01 05:45:39.920 btags += '^' * lb
2025-07-01 05:45:39.928 elif tag == 'delete':
2025-07-01 05:45:39.935 atags += '-' * la
2025-07-01 05:45:39.942 elif tag == 'insert':
2025-07-01 05:45:39.951 btags += '+' * lb
2025-07-01 05:45:39.960 elif tag == 'equal':
2025-07-01 05:45:39.966 atags += ' ' * la
2025-07-01 05:45:39.975 btags += ' ' * lb
2025-07-01 05:45:39.985 else:
2025-07-01 05:45:39.992 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:45:39.997 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:45:40.004 else:
2025-07-01 05:45:40.014 # the synch pair is identical
2025-07-01 05:45:40.021 yield ' ' + aelt
2025-07-01 05:45:40.027
2025-07-01 05:45:40.032 # pump out diffs from after the synch point
2025-07-01 05:45:40.036 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:45:40.041
2025-07-01 05:45:40.045 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:45:40.050 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:45:40.054
2025-07-01 05:45:40.058 self = <difflib.Differ object at [hex]>
2025-07-01 05:45:40.063 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:45:40.068 alo = 108, ahi = 1101
2025-07-01 05:45:40.073 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:45:40.077 blo = 108, bhi = 1101
2025-07-01 05:45:40.082
2025-07-01 05:45:40.086 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:45:40.091 g = []
2025-07-01 05:45:40.096 if alo < ahi:
2025-07-01 05:45:40.102 if blo < bhi:
2025-07-01 05:45:40.111 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:45:40.123 else:
2025-07-01 05:45:40.132 g = self._dump('-', a, alo, ahi)
2025-07-01 05:45:40.140 elif blo < bhi:
2025-07-01 05:45:40.147 g = self._dump('+', b, blo, bhi)
2025-07-01 05:45:40.155
2025-07-01 05:45:40.165 > yield from g
2025-07-01 05:45:40.176
2025-07-01 05:45:40.185 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:45:40.192 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:45:40.199
2025-07-01 05:45:40.207 self = <difflib.Differ object at [hex]>
2025-07-01 05:45:40.218 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:45:40.226 alo = 108, ahi = 1101
2025-07-01 05:45:40.233 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:45:40.239 blo = 108, bhi = 1101
2025-07-01 05:45:40.245
2025-07-01 05:45:40.250 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:45:40.256 r"""
2025-07-01 05:45:40.262 When replacing one block of lines with another, search the blocks
2025-07-01 05:45:40.268 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:45:40.275 synch point, and intraline difference marking is done on the
2025-07-01 05:45:40.286 similar pair. Lots of work, but often worth it.
2025-07-01 05:45:40.297
2025-07-01 05:45:40.306 Example:
2025-07-01 05:45:40.312
2025-07-01 05:45:40.318 >>> d = Differ()
2025-07-01 05:45:40.325 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:45:40.332 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:45:40.338 >>> print(''.join(results), end="")
2025-07-01 05:45:40.345 - abcDefghiJkl
2025-07-01 05:45:40.358 + abcdefGhijkl
2025-07-01 05:45:40.371 """
2025-07-01 05:45:40.377
2025-07-01 05:45:40.387 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:45:40.394 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:45:40.404 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:45:40.412 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:45:40.424 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:45:40.435
2025-07-01 05:45:40.447 # search for the pair that matches best without being identical
2025-07-01 05:45:40.459 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:45:40.468 # on junk -- unless we have to)
2025-07-01 05:45:40.475 for j in range(blo, bhi):
2025-07-01 05:45:40.482 bj = b[j]
2025-07-01 05:45:40.493 cruncher.set_seq2(bj)
2025-07-01 05:45:40.504 for i in range(alo, ahi):
2025-07-01 05:45:40.514 ai = a[i]
2025-07-01 05:45:40.522 if ai == bj:
2025-07-01 05:45:40.530 if eqi is None:
2025-07-01 05:45:40.540 eqi, eqj = i, j
2025-07-01 05:45:40.549 continue
2025-07-01 05:45:40.558 cruncher.set_seq1(ai)
2025-07-01 05:45:40.569 # computing similarity is expensive, so use the quick
2025-07-01 05:45:40.579 # upper bounds first -- have seen this speed up messy
2025-07-01 05:45:40.587 # compares by a factor of 3.
2025-07-01 05:45:40.595 # note that ratio() is only expensive to compute the first
2025-07-01 05:45:40.602 # time it's called on a sequence pair; the expensive part
2025-07-01 05:45:40.610 # of the computation is cached by cruncher
2025-07-01 05:45:40.619 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:45:40.626 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:45:40.635 cruncher.ratio() > best_ratio:
2025-07-01 05:45:40.644 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:45:40.651 if best_ratio < cutoff:
2025-07-01 05:45:40.658 # no non-identical "pretty close" pair
2025-07-01 05:45:40.664 if eqi is None:
2025-07-01 05:45:40.671 # no identical pair either -- treat it as a straight replace
2025-07-01 05:45:40.682 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:45:40.690 return
2025-07-01 05:45:40.696 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:45:40.702 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:45:40.712 else:
2025-07-01 05:45:40.720 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:45:40.727 eqi = None
2025-07-01 05:45:40.734
2025-07-01 05:45:40.744 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:45:40.754 # identical
2025-07-01 05:45:40.763
2025-07-01 05:45:40.770 # pump out diffs from before the synch point
2025-07-01 05:45:40.777 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:45:40.783
2025-07-01 05:45:40.796 # do intraline marking on the synch pair
2025-07-01 05:45:40.803 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:45:40.815 if eqi is None:
2025-07-01 05:45:40.823 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:45:40.833 atags = btags = ""
2025-07-01 05:45:40.846 cruncher.set_seqs(aelt, belt)
2025-07-01 05:45:40.859 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:45:40.868 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:45:40.875 if tag == 'replace':
2025-07-01 05:45:40.883 atags += '^' * la
2025-07-01 05:45:40.896 btags += '^' * lb
2025-07-01 05:45:40.909 elif tag == 'delete':
2025-07-01 05:45:40.921 atags += '-' * la
2025-07-01 05:45:40.929 elif tag == 'insert':
2025-07-01 05:45:40.937 btags += '+' * lb
2025-07-01 05:45:40.947 elif tag == 'equal':
2025-07-01 05:45:40.953 atags += ' ' * la
2025-07-01 05:45:40.958 btags += ' ' * lb
2025-07-01 05:45:40.963 else:
2025-07-01 05:45:40.967 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:45:40.972 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:45:40.983 else:
2025-07-01 05:45:40.991 # the synch pair is identical
2025-07-01 05:45:40.998 yield ' ' + aelt
2025-07-01 05:45:41.011
2025-07-01 05:45:41.020 # pump out diffs from after the synch point
2025-07-01 05:45:41.027 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:45:41.033
2025-07-01 05:45:41.039 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:45:41.046 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:45:41.054
2025-07-01 05:45:41.067 self = <difflib.Differ object at [hex]>
2025-07-01 05:45:41.078 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:45:41.084 alo = 109, ahi = 1101
2025-07-01 05:45:41.092 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:45:41.098 blo = 109, bhi = 1101
2025-07-01 05:45:41.109
2025-07-01 05:45:41.120 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:45:41.130 g = []
2025-07-01 05:45:41.139 if alo < ahi:
2025-07-01 05:45:41.150 if blo < bhi:
2025-07-01 05:45:41.160 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:45:41.170 else:
2025-07-01 05:45:41.179 g = self._dump('-', a, alo, ahi)
2025-07-01 05:45:41.185 elif blo < bhi:
2025-07-01 05:45:41.191 g = self._dump('+', b, blo, bhi)
2025-07-01 05:45:41.197
2025-07-01 05:45:41.203 > yield from g
2025-07-01 05:45:41.209
2025-07-01 05:45:41.215 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:45:41.222 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:45:41.231
2025-07-01 05:45:41.241 self = <difflib.Differ object at [hex]>
2025-07-01 05:45:41.251 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:45:41.262 alo = 109, ahi = 1101
2025-07-01 05:45:41.273 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:45:41.280 blo = 109, bhi = 1101
2025-07-01 05:45:41.287
2025-07-01 05:45:41.294 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:45:41.301 r"""
2025-07-01 05:45:41.308 When replacing one block of lines with another, search the blocks
2025-07-01 05:45:41.315 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:45:41.323 synch point, and intraline difference marking is done on the
2025-07-01 05:45:41.330 similar pair. Lots of work, but often worth it.
2025-07-01 05:45:41.337
2025-07-01 05:45:41.351 Example:
2025-07-01 05:45:41.358
2025-07-01 05:45:41.369 >>> d = Differ()
2025-07-01 05:45:41.378 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:45:41.385 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:45:41.391 >>> print(''.join(results), end="")
2025-07-01 05:45:41.396 - abcDefghiJkl
2025-07-01 05:45:41.406 + abcdefGhijkl
2025-07-01 05:45:41.415 """
2025-07-01 05:45:41.420
2025-07-01 05:45:41.425 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:45:41.430 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:45:41.441 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:45:41.449 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:45:41.456 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:45:41.462
2025-07-01 05:45:41.467 # search for the pair that matches best without being identical
2025-07-01 05:45:41.472 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:45:41.477 # on junk -- unless we have to)
2025-07-01 05:45:41.485 for j in range(blo, bhi):
2025-07-01 05:45:41.493 bj = b[j]
2025-07-01 05:45:41.500 cruncher.set_seq2(bj)
2025-07-01 05:45:41.507 for i in range(alo, ahi):
2025-07-01 05:45:41.514 ai = a[i]
2025-07-01 05:45:41.521 if ai == bj:
2025-07-01 05:45:41.528 if eqi is None:
2025-07-01 05:45:41.535 eqi, eqj = i, j
2025-07-01 05:45:41.544 continue
2025-07-01 05:45:41.555 cruncher.set_seq1(ai)
2025-07-01 05:45:41.568 # computing similarity is expensive, so use the quick
2025-07-01 05:45:41.575 # upper bounds first -- have seen this speed up messy
2025-07-01 05:45:41.581 # compares by a factor of 3.
2025-07-01 05:45:41.587 # note that ratio() is only expensive to compute the first
2025-07-01 05:45:41.595 # time it's called on a sequence pair; the expensive part
2025-07-01 05:45:41.602 # of the computation is cached by cruncher
2025-07-01 05:45:41.610 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:45:41.617 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:45:41.624 cruncher.ratio() > best_ratio:
2025-07-01 05:45:41.630 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:45:41.637 if best_ratio < cutoff:
2025-07-01 05:45:41.643 # no non-identical "pretty close" pair
2025-07-01 05:45:41.650 if eqi is None:
2025-07-01 05:45:41.660 # no identical pair either -- treat it as a straight replace
2025-07-01 05:45:41.669 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:45:41.678 return
2025-07-01 05:45:41.685 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:45:41.691 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:45:41.696 else:
2025-07-01 05:45:41.702 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:45:41.706 eqi = None
2025-07-01 05:45:41.710
2025-07-01 05:45:41.716 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:45:41.721 # identical
2025-07-01 05:45:41.726
2025-07-01 05:45:41.731 # pump out diffs from before the synch point
2025-07-01 05:45:41.737 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:45:41.743
2025-07-01 05:45:41.748 # do intraline marking on the synch pair
2025-07-01 05:45:41.754 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:45:41.759 if eqi is None:
2025-07-01 05:45:41.763 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:45:41.768 atags = btags = ""
2025-07-01 05:45:41.773 cruncher.set_seqs(aelt, belt)
2025-07-01 05:45:41.778 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:45:41.783 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:45:41.789 if tag == 'replace':
2025-07-01 05:45:41.795 atags += '^' * la
2025-07-01 05:45:41.801 btags += '^' * lb
2025-07-01 05:45:41.807 elif tag == 'delete':
2025-07-01 05:45:41.814 atags += '-' * la
2025-07-01 05:45:41.821 elif tag == 'insert':
2025-07-01 05:45:41.827 btags += '+' * lb
2025-07-01 05:45:41.834 elif tag == 'equal':
2025-07-01 05:45:41.841 atags += ' ' * la
2025-07-01 05:45:41.847 btags += ' ' * lb
2025-07-01 05:45:41.854 else:
2025-07-01 05:45:41.865 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:45:41.875 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:45:41.881 else:
2025-07-01 05:45:41.887 # the synch pair is identical
2025-07-01 05:45:41.892 yield ' ' + aelt
2025-07-01 05:45:41.897
2025-07-01 05:45:41.908 # pump out diffs from after the synch point
2025-07-01 05:45:41.918 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:45:41.927
2025-07-01 05:45:41.935 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:45:41.943 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:45:41.949
2025-07-01 05:45:41.960 self = <difflib.Differ object at [hex]>
2025-07-01 05:45:41.970 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:45:41.978 alo = 110, ahi = 1101
2025-07-01 05:45:41.990 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:45:42.000 blo = 110, bhi = 1101
2025-07-01 05:45:42.012
2025-07-01 05:45:42.022 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:45:42.028 g = []
2025-07-01 05:45:42.034 if alo < ahi:
2025-07-01 05:45:42.043 if blo < bhi:
2025-07-01 05:45:42.053 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:45:42.060 else:
2025-07-01 05:45:42.067 g = self._dump('-', a, alo, ahi)
2025-07-01 05:45:42.073 elif blo < bhi:
2025-07-01 05:45:42.079 g = self._dump('+', b, blo, bhi)
2025-07-01 05:45:42.084
2025-07-01 05:45:42.092 > yield from g
2025-07-01 05:45:42.100
2025-07-01 05:45:42.108 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:45:42.116 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:45:42.126
2025-07-01 05:45:42.136 self = <difflib.Differ object at [hex]>
2025-07-01 05:45:42.148 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:45:42.156 alo = 110, ahi = 1101
2025-07-01 05:45:42.166 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:45:42.177 blo = 110, bhi = 1101
2025-07-01 05:45:42.190
2025-07-01 05:45:42.199 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:45:42.206 r"""
2025-07-01 05:45:42.213 When replacing one block of lines with another, search the blocks
2025-07-01 05:45:42.219 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:45:42.225 synch point, and intraline difference marking is done on the
2025-07-01 05:45:42.237 similar pair. Lots of work, but often worth it.
2025-07-01 05:45:42.246
2025-07-01 05:45:42.253 Example:
2025-07-01 05:45:42.260
2025-07-01 05:45:42.272 >>> d = Differ()
2025-07-01 05:45:42.284 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:45:42.293 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:45:42.304 >>> print(''.join(results), end="")
2025-07-01 05:45:42.315 - abcDefghiJkl
2025-07-01 05:45:42.334 + abcdefGhijkl
2025-07-01 05:45:42.354 """
2025-07-01 05:45:42.362
2025-07-01 05:45:42.372 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:45:42.379 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:45:42.385 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:45:42.391 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:45:42.398 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:45:42.414
2025-07-01 05:45:42.424 # search for the pair that matches best without being identical
2025-07-01 05:45:42.432 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:45:42.438 # on junk -- unless we have to)
2025-07-01 05:45:42.444 for j in range(blo, bhi):
2025-07-01 05:45:42.449 bj = b[j]
2025-07-01 05:45:42.454 cruncher.set_seq2(bj)
2025-07-01 05:45:42.459 for i in range(alo, ahi):
2025-07-01 05:45:42.464 ai = a[i]
2025-07-01 05:45:42.470 if ai == bj:
2025-07-01 05:45:42.476 if eqi is None:
2025-07-01 05:45:42.484 eqi, eqj = i, j
2025-07-01 05:45:42.495 continue
2025-07-01 05:45:42.504 cruncher.set_seq1(ai)
2025-07-01 05:45:42.512 # computing similarity is expensive, so use the quick
2025-07-01 05:45:42.518 # upper bounds first -- have seen this speed up messy
2025-07-01 05:45:42.524 # compares by a factor of 3.
2025-07-01 05:45:42.530 # note that ratio() is only expensive to compute the first
2025-07-01 05:45:42.535 # time it's called on a sequence pair; the expensive part
2025-07-01 05:45:42.541 # of the computation is cached by cruncher
2025-07-01 05:45:42.547 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:45:42.553 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:45:42.561 cruncher.ratio() > best_ratio:
2025-07-01 05:45:42.569 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:45:42.576 if best_ratio < cutoff:
2025-07-01 05:45:42.588 # no non-identical "pretty close" pair
2025-07-01 05:45:42.599 if eqi is None:
2025-07-01 05:45:42.608 # no identical pair either -- treat it as a straight replace
2025-07-01 05:45:42.616 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:45:42.624 return
2025-07-01 05:45:42.631 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:45:42.639 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:45:42.645 else:
2025-07-01 05:45:42.651 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:45:42.658 eqi = None
2025-07-01 05:45:42.665
2025-07-01 05:45:42.672 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:45:42.680 # identical
2025-07-01 05:45:42.687
2025-07-01 05:45:42.693 # pump out diffs from before the synch point
2025-07-01 05:45:42.699 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:45:42.706
2025-07-01 05:45:42.718 # do intraline marking on the synch pair
2025-07-01 05:45:42.727 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:45:42.734 if eqi is None:
2025-07-01 05:45:42.743 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:45:42.754 atags = btags = ""
2025-07-01 05:45:42.766 cruncher.set_seqs(aelt, belt)
2025-07-01 05:45:42.774 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:45:42.780 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:45:42.787 if tag == 'replace':
2025-07-01 05:45:42.796 atags += '^' * la
2025-07-01 05:45:42.804 btags += '^' * lb
2025-07-01 05:45:42.811 elif tag == 'delete':
2025-07-01 05:45:42.817 atags += '-' * la
2025-07-01 05:45:42.821 elif tag == 'insert':
2025-07-01 05:45:42.832 btags += '+' * lb
2025-07-01 05:45:42.844 elif tag == 'equal':
2025-07-01 05:45:42.856 atags += ' ' * la
2025-07-01 05:45:42.866 btags += ' ' * lb
2025-07-01 05:45:42.875 else:
2025-07-01 05:45:42.887 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:45:42.899 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:45:42.908 else:
2025-07-01 05:45:42.916 # the synch pair is identical
2025-07-01 05:45:42.923 yield ' ' + aelt
2025-07-01 05:45:42.930
2025-07-01 05:45:42.936 # pump out diffs from after the synch point
2025-07-01 05:45:42.942 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:45:42.947
2025-07-01 05:45:42.955 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:45:42.966 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:45:42.977
2025-07-01 05:45:42.988 self = <difflib.Differ object at [hex]>
2025-07-01 05:45:42.998 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:45:43.008 alo = 111, ahi = 1101
2025-07-01 05:45:43.021 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:45:43.031 blo = 111, bhi = 1101
2025-07-01 05:45:43.038
2025-07-01 05:45:43.050 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:45:43.060 g = []
2025-07-01 05:45:43.068 if alo < ahi:
2025-07-01 05:45:43.075 if blo < bhi:
2025-07-01 05:45:43.083 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:45:43.088 else:
2025-07-01 05:45:43.097 g = self._dump('-', a, alo, ahi)
2025-07-01 05:45:43.108 elif blo < bhi:
2025-07-01 05:45:43.119 g = self._dump('+', b, blo, bhi)
2025-07-01 05:45:43.128
2025-07-01 05:45:43.137 > yield from g
2025-07-01 05:45:43.149
2025-07-01 05:45:43.161 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:45:43.174 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:45:43.187
2025-07-01 05:45:43.199 self = <difflib.Differ object at [hex]>
2025-07-01 05:45:43.211 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:45:43.223 alo = 111, ahi = 1101
2025-07-01 05:45:43.237 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:45:43.248 blo = 111, bhi = 1101
2025-07-01 05:45:43.256
2025-07-01 05:45:43.263 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:45:43.271 r"""
2025-07-01 05:45:43.282 When replacing one block of lines with another, search the blocks
2025-07-01 05:45:43.291 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:45:43.299 synch point, and intraline difference marking is done on the
2025-07-01 05:45:43.307 similar pair. Lots of work, but often worth it.
2025-07-01 05:45:43.318
2025-07-01 05:45:43.326 Example:
2025-07-01 05:45:43.333
2025-07-01 05:45:43.340 >>> d = Differ()
2025-07-01 05:45:43.346 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:45:43.356 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:45:43.367 >>> print(''.join(results), end="")
2025-07-01 05:45:43.375 - abcDefghiJkl
2025-07-01 05:45:43.395 + abcdefGhijkl
2025-07-01 05:45:43.409 """
2025-07-01 05:45:43.415
2025-07-01 05:45:43.422 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:45:43.435 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:45:43.444 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:45:43.453 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:45:43.461 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:45:43.468
2025-07-01 05:45:43.475 # search for the pair that matches best without being identical
2025-07-01 05:45:43.482 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:45:43.488 # on junk -- unless we have to)
2025-07-01 05:45:43.494 for j in range(blo, bhi):
2025-07-01 05:45:43.501 bj = b[j]
2025-07-01 05:45:43.506 cruncher.set_seq2(bj)
2025-07-01 05:45:43.516 for i in range(alo, ahi):
2025-07-01 05:45:43.527 ai = a[i]
2025-07-01 05:45:43.539 if ai == bj:
2025-07-01 05:45:43.548 if eqi is None:
2025-07-01 05:45:43.555 eqi, eqj = i, j
2025-07-01 05:45:43.563 continue
2025-07-01 05:45:43.574 cruncher.set_seq1(ai)
2025-07-01 05:45:43.584 # computing similarity is expensive, so use the quick
2025-07-01 05:45:43.592 # upper bounds first -- have seen this speed up messy
2025-07-01 05:45:43.604 # compares by a factor of 3.
2025-07-01 05:45:43.615 # note that ratio() is only expensive to compute the first
2025-07-01 05:45:43.624 # time it's called on a sequence pair; the expensive part
2025-07-01 05:45:43.631 # of the computation is cached by cruncher
2025-07-01 05:45:43.639 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:45:43.647 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:45:43.656 cruncher.ratio() > best_ratio:
2025-07-01 05:45:43.665 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:45:43.675 if best_ratio < cutoff:
2025-07-01 05:45:43.687 # no non-identical "pretty close" pair
2025-07-01 05:45:43.697 if eqi is None:
2025-07-01 05:45:43.707 # no identical pair either -- treat it as a straight replace
2025-07-01 05:45:43.719 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:45:43.729 return
2025-07-01 05:45:43.737 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:45:43.744 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:45:43.750 else:
2025-07-01 05:45:43.760 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:45:43.770 eqi = None
2025-07-01 05:45:43.778
2025-07-01 05:45:43.786 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:45:43.795 # identical
2025-07-01 05:45:43.806
2025-07-01 05:45:43.815 # pump out diffs from before the synch point
2025-07-01 05:45:43.824 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:45:43.831
2025-07-01 05:45:43.837 # do intraline marking on the synch pair
2025-07-01 05:45:43.843 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:45:43.849 if eqi is None:
2025-07-01 05:45:43.854 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:45:43.860 atags = btags = ""
2025-07-01 05:45:43.866 cruncher.set_seqs(aelt, belt)
2025-07-01 05:45:43.872 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:45:43.877 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:45:43.883 if tag == 'replace':
2025-07-01 05:45:43.889 atags += '^' * la
2025-07-01 05:45:43.894 btags += '^' * lb
2025-07-01 05:45:43.900 elif tag == 'delete':
2025-07-01 05:45:43.909 atags += '-' * la
2025-07-01 05:45:43.921 elif tag == 'insert':
2025-07-01 05:45:43.930 btags += '+' * lb
2025-07-01 05:45:43.938 elif tag == 'equal':
2025-07-01 05:45:43.944 atags += ' ' * la
2025-07-01 05:45:43.951 btags += ' ' * lb
2025-07-01 05:45:43.956 else:
2025-07-01 05:45:43.962 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:45:43.968 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:45:43.974 else:
2025-07-01 05:45:43.979 # the synch pair is identical
2025-07-01 05:45:43.985 yield ' ' + aelt
2025-07-01 05:45:43.991
2025-07-01 05:45:43.997 # pump out diffs from after the synch point
2025-07-01 05:45:44.003 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:45:44.015
2025-07-01 05:45:44.024 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:45:44.034 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:45:44.045
2025-07-01 05:45:44.054 self = <difflib.Differ object at [hex]>
2025-07-01 05:45:44.068 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:45:44.079 alo = 114, ahi = 1101
2025-07-01 05:45:44.088 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:45:44.093 blo = 114, bhi = 1101
2025-07-01 05:45:44.102
2025-07-01 05:45:44.109 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:45:44.115 g = []
2025-07-01 05:45:44.122 if alo < ahi:
2025-07-01 05:45:44.128 if blo < bhi:
2025-07-01 05:45:44.135 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:45:44.143 else:
2025-07-01 05:45:44.155 g = self._dump('-', a, alo, ahi)
2025-07-01 05:45:44.163 elif blo < bhi:
2025-07-01 05:45:44.174 g = self._dump('+', b, blo, bhi)
2025-07-01 05:45:44.182
2025-07-01 05:45:44.190 > yield from g
2025-07-01 05:45:44.203
2025-07-01 05:45:44.212 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:45:44.220 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:45:44.227
2025-07-01 05:45:44.238 self = <difflib.Differ object at [hex]>
2025-07-01 05:45:44.248 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:45:44.257 alo = 114, ahi = 1101
2025-07-01 05:45:44.269 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:45:44.278 blo = 114, bhi = 1101
2025-07-01 05:45:44.288
2025-07-01 05:45:44.299 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:45:44.307 r"""
2025-07-01 05:45:44.315 When replacing one block of lines with another, search the blocks
2025-07-01 05:45:44.323 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:45:44.334 synch point, and intraline difference marking is done on the
2025-07-01 05:45:44.343 similar pair. Lots of work, but often worth it.
2025-07-01 05:45:44.350
2025-07-01 05:45:44.362 Example:
2025-07-01 05:45:44.371
2025-07-01 05:45:44.379 >>> d = Differ()
2025-07-01 05:45:44.387 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:45:44.399 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:45:44.411 >>> print(''.join(results), end="")
2025-07-01 05:45:44.420 - abcDefghiJkl
2025-07-01 05:45:44.435 + abcdefGhijkl
2025-07-01 05:45:44.448 """
2025-07-01 05:45:44.455
2025-07-01 05:45:44.462 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:45:44.472 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:45:44.483 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:45:44.490 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:45:44.501 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:45:44.513
2025-07-01 05:45:44.525 # search for the pair that matches best without being identical
2025-07-01 05:45:44.535 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:45:44.543 # on junk -- unless we have to)
2025-07-01 05:45:44.556 for j in range(blo, bhi):
2025-07-01 05:45:44.566 bj = b[j]
2025-07-01 05:45:44.575 cruncher.set_seq2(bj)
2025-07-01 05:45:44.582 for i in range(alo, ahi):
2025-07-01 05:45:44.594 ai = a[i]
2025-07-01 05:45:44.604 if ai == bj:
2025-07-01 05:45:44.613 if eqi is None:
2025-07-01 05:45:44.621 eqi, eqj = i, j
2025-07-01 05:45:44.627 continue
2025-07-01 05:45:44.634 cruncher.set_seq1(ai)
2025-07-01 05:45:44.643 # computing similarity is expensive, so use the quick
2025-07-01 05:45:44.652 # upper bounds first -- have seen this speed up messy
2025-07-01 05:45:44.663 # compares by a factor of 3.
2025-07-01 05:45:44.672 # note that ratio() is only expensive to compute the first
2025-07-01 05:45:44.679 # time it's called on a sequence pair; the expensive part
2025-07-01 05:45:44.685 # of the computation is cached by cruncher
2025-07-01 05:45:44.690 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:45:44.695 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:45:44.701 cruncher.ratio() > best_ratio:
2025-07-01 05:45:44.706 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:45:44.712 if best_ratio < cutoff:
2025-07-01 05:45:44.718 # no non-identical "pretty close" pair
2025-07-01 05:45:44.724 if eqi is None:
2025-07-01 05:45:44.730 # no identical pair either -- treat it as a straight replace
2025-07-01 05:45:44.739 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:45:44.746 return
2025-07-01 05:45:44.752 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:45:44.759 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:45:44.766 else:
2025-07-01 05:45:44.777 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:45:44.788 eqi = None
2025-07-01 05:45:44.797
2025-07-01 05:45:44.806 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:45:44.812 # identical
2025-07-01 05:45:44.817
2025-07-01 05:45:44.823 # pump out diffs from before the synch point
2025-07-01 05:45:44.829 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:45:44.835
2025-07-01 05:45:44.841 # do intraline marking on the synch pair
2025-07-01 05:45:44.849 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:45:44.856 if eqi is None:
2025-07-01 05:45:44.863 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:45:44.868 atags = btags = ""
2025-07-01 05:45:44.873 cruncher.set_seqs(aelt, belt)
2025-07-01 05:45:44.878 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:45:44.883 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:45:44.888 if tag == 'replace':
2025-07-01 05:45:44.892 atags += '^' * la
2025-07-01 05:45:44.897 btags += '^' * lb
2025-07-01 05:45:44.901 elif tag == 'delete':
2025-07-01 05:45:44.907 atags += '-' * la
2025-07-01 05:45:44.913 elif tag == 'insert':
2025-07-01 05:45:44.924 btags += '+' * lb
2025-07-01 05:45:44.935 elif tag == 'equal':
2025-07-01 05:45:44.944 atags += ' ' * la
2025-07-01 05:45:44.951 btags += ' ' * lb
2025-07-01 05:45:44.957 else:
2025-07-01 05:45:44.961 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:45:44.966 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:45:44.970 else:
2025-07-01 05:45:44.975 # the synch pair is identical
2025-07-01 05:45:44.979 yield ' ' + aelt
2025-07-01 05:45:44.984
2025-07-01 05:45:44.988 # pump out diffs from after the synch point
2025-07-01 05:45:44.993 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:45:44.997
2025-07-01 05:45:45.004 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:45:45.011 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:45:45.021
2025-07-01 05:45:45.033 self = <difflib.Differ object at [hex]>
2025-07-01 05:45:45.045 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:45:45.054 alo = 115, ahi = 1101
2025-07-01 05:45:45.062 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:45:45.068 blo = 115, bhi = 1101
2025-07-01 05:45:45.074
2025-07-01 05:45:45.080 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:45:45.086 g = []
2025-07-01 05:45:45.092 if alo < ahi:
2025-07-01 05:45:45.099 if blo < bhi:
2025-07-01 05:45:45.110 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:45:45.118 else:
2025-07-01 05:45:45.125 g = self._dump('-', a, alo, ahi)
2025-07-01 05:45:45.135 elif blo < bhi:
2025-07-01 05:45:45.147 g = self._dump('+', b, blo, bhi)
2025-07-01 05:45:45.155
2025-07-01 05:45:45.162 > yield from g
2025-07-01 05:45:45.168
2025-07-01 05:45:45.174 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:45:45.187 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:45:45.193
2025-07-01 05:45:45.199 self = <difflib.Differ object at [hex]>
2025-07-01 05:45:45.213 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:45:45.224 alo = 115, ahi = 1101
2025-07-01 05:45:45.234 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:45:45.246 blo = 115, bhi = 1101
2025-07-01 05:45:45.255
2025-07-01 05:45:45.262 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:45:45.271 r"""
2025-07-01 05:45:45.282 When replacing one block of lines with another, search the blocks
2025-07-01 05:45:45.293 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:45:45.303 synch point, and intraline difference marking is done on the
2025-07-01 05:45:45.310 similar pair. Lots of work, but often worth it.
2025-07-01 05:45:45.316
2025-07-01 05:45:45.322 Example:
2025-07-01 05:45:45.326
2025-07-01 05:45:45.331 >>> d = Differ()
2025-07-01 05:45:45.336 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:45:45.341 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:45:45.346 >>> print(''.join(results), end="")
2025-07-01 05:45:45.352 - abcDefghiJkl
2025-07-01 05:45:45.363 + abcdefGhijkl
2025-07-01 05:45:45.379 """
2025-07-01 05:45:45.390
2025-07-01 05:45:45.402 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:45:45.414 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:45:45.427 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:45:45.437 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:45:45.448 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:45:45.457
2025-07-01 05:45:45.465 # search for the pair that matches best without being identical
2025-07-01 05:45:45.472 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:45:45.478 # on junk -- unless we have to)
2025-07-01 05:45:45.491 for j in range(blo, bhi):
2025-07-01 05:45:45.502 bj = b[j]
2025-07-01 05:45:45.510 cruncher.set_seq2(bj)
2025-07-01 05:45:45.517 for i in range(alo, ahi):
2025-07-01 05:45:45.524 ai = a[i]
2025-07-01 05:45:45.530 if ai == bj:
2025-07-01 05:45:45.540 if eqi is None:
2025-07-01 05:45:45.551 eqi, eqj = i, j
2025-07-01 05:45:45.560 continue
2025-07-01 05:45:45.568 cruncher.set_seq1(ai)
2025-07-01 05:45:45.576 # computing similarity is expensive, so use the quick
2025-07-01 05:45:45.586 # upper bounds first -- have seen this speed up messy
2025-07-01 05:45:45.597 # compares by a factor of 3.
2025-07-01 05:45:45.610 # note that ratio() is only expensive to compute the first
2025-07-01 05:45:45.621 # time it's called on a sequence pair; the expensive part
2025-07-01 05:45:45.632 # of the computation is cached by cruncher
2025-07-01 05:45:45.640 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:45:45.651 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:45:45.660 cruncher.ratio() > best_ratio:
2025-07-01 05:45:45.668 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:45:45.673 if best_ratio < cutoff:
2025-07-01 05:45:45.679 # no non-identical "pretty close" pair
2025-07-01 05:45:45.685 if eqi is None:
2025-07-01 05:45:45.691 # no identical pair either -- treat it as a straight replace
2025-07-01 05:45:45.698 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:45:45.709 return
2025-07-01 05:45:45.719 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:45:45.728 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:45:45.735 else:
2025-07-01 05:45:45.741 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:45:45.751 eqi = None
2025-07-01 05:45:45.761
2025-07-01 05:45:45.768 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:45:45.778 # identical
2025-07-01 05:45:45.788
2025-07-01 05:45:45.797 # pump out diffs from before the synch point
2025-07-01 05:45:45.808 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:45:45.818
2025-07-01 05:45:45.827 # do intraline marking on the synch pair
2025-07-01 05:45:45.837 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:45:45.848 if eqi is None:
2025-07-01 05:45:45.858 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:45:45.870 atags = btags = ""
2025-07-01 05:45:45.880 cruncher.set_seqs(aelt, belt)
2025-07-01 05:45:45.888 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:45:45.896 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:45:45.902 if tag == 'replace':
2025-07-01 05:45:45.908 atags += '^' * la
2025-07-01 05:45:45.914 btags += '^' * lb
2025-07-01 05:45:45.920 elif tag == 'delete':
2025-07-01 05:45:45.926 atags += '-' * la
2025-07-01 05:45:45.932 elif tag == 'insert':
2025-07-01 05:45:45.938 btags += '+' * lb
2025-07-01 05:45:45.944 elif tag == 'equal':
2025-07-01 05:45:45.950 atags += ' ' * la
2025-07-01 05:45:45.956 btags += ' ' * lb
2025-07-01 05:45:45.969 else:
2025-07-01 05:45:45.982 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:45:45.995 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:45:46.007 else:
2025-07-01 05:45:46.021 # the synch pair is identical
2025-07-01 05:45:46.032 yield ' ' + aelt
2025-07-01 05:45:46.042
2025-07-01 05:45:46.054 # pump out diffs from after the synch point
2025-07-01 05:45:46.063 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:45:46.075
2025-07-01 05:45:46.083 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:45:46.090 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:45:46.095
2025-07-01 05:45:46.100 self = <difflib.Differ object at [hex]>
2025-07-01 05:45:46.105 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:45:46.109 alo = 116, ahi = 1101
2025-07-01 05:45:46.114 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:45:46.124 blo = 116, bhi = 1101
2025-07-01 05:45:46.133
2025-07-01 05:45:46.141 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:45:46.147 g = []
2025-07-01 05:45:46.154 if alo < ahi:
2025-07-01 05:45:46.160 if blo < bhi:
2025-07-01 05:45:46.166 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:45:46.172 else:
2025-07-01 05:45:46.178 g = self._dump('-', a, alo, ahi)
2025-07-01 05:45:46.189 elif blo < bhi:
2025-07-01 05:45:46.200 g = self._dump('+', b, blo, bhi)
2025-07-01 05:45:46.208
2025-07-01 05:45:46.214 > yield from g
2025-07-01 05:45:46.220
2025-07-01 05:45:46.226 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:45:46.232 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:45:46.238
2025-07-01 05:45:46.243 self = <difflib.Differ object at [hex]>
2025-07-01 05:45:46.248 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:45:46.253 alo = 116, ahi = 1101
2025-07-01 05:45:46.258 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:45:46.264 blo = 116, bhi = 1101
2025-07-01 05:45:46.274
2025-07-01 05:45:46.283 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:45:46.291 r"""
2025-07-01 05:45:46.297 When replacing one block of lines with another, search the blocks
2025-07-01 05:45:46.303 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:45:46.309 synch point, and intraline difference marking is done on the
2025-07-01 05:45:46.315 similar pair. Lots of work, but often worth it.
2025-07-01 05:45:46.321
2025-07-01 05:45:46.328 Example:
2025-07-01 05:45:46.334
2025-07-01 05:45:46.341 >>> d = Differ()
2025-07-01 05:45:46.347 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:45:46.354 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:45:46.361 >>> print(''.join(results), end="")
2025-07-01 05:45:46.368 - abcDefghiJkl
2025-07-01 05:45:46.384 + abcdefGhijkl
2025-07-01 05:45:46.404 """
2025-07-01 05:45:46.411
2025-07-01 05:45:46.418 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:45:46.423 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:45:46.429 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:45:46.435 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:45:46.442 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:45:46.449
2025-07-01 05:45:46.456 # search for the pair that matches best without being identical
2025-07-01 05:45:46.462 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:45:46.467 # on junk -- unless we have to)
2025-07-01 05:45:46.472 for j in range(blo, bhi):
2025-07-01 05:45:46.477 bj = b[j]
2025-07-01 05:45:46.482 cruncher.set_seq2(bj)
2025-07-01 05:45:46.486 for i in range(alo, ahi):
2025-07-01 05:45:46.491 ai = a[i]
2025-07-01 05:45:46.496 if ai == bj:
2025-07-01 05:45:46.502 if eqi is None:
2025-07-01 05:45:46.511 eqi, eqj = i, j
2025-07-01 05:45:46.521 continue
2025-07-01 05:45:46.528 cruncher.set_seq1(ai)
2025-07-01 05:45:46.534 # computing similarity is expensive, so use the quick
2025-07-01 05:45:46.540 # upper bounds first -- have seen this speed up messy
2025-07-01 05:45:46.546 # compares by a factor of 3.
2025-07-01 05:45:46.551 # note that ratio() is only expensive to compute the first
2025-07-01 05:45:46.558 # time it's called on a sequence pair; the expensive part
2025-07-01 05:45:46.565 # of the computation is cached by cruncher
2025-07-01 05:45:46.573 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:45:46.579 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:45:46.585 cruncher.ratio() > best_ratio:
2025-07-01 05:45:46.591 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:45:46.597 if best_ratio < cutoff:
2025-07-01 05:45:46.603 # no non-identical "pretty close" pair
2025-07-01 05:45:46.610 if eqi is None:
2025-07-01 05:45:46.618 # no identical pair either -- treat it as a straight replace
2025-07-01 05:45:46.627 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:45:46.635 return
2025-07-01 05:45:46.642 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:45:46.649 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:45:46.655 else:
2025-07-01 05:45:46.661 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:45:46.666 eqi = None
2025-07-01 05:45:46.672
2025-07-01 05:45:46.678 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:45:46.686 # identical
2025-07-01 05:45:46.693
2025-07-01 05:45:46.698 # pump out diffs from before the synch point
2025-07-01 05:45:46.704 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:45:46.709
2025-07-01 05:45:46.715 # do intraline marking on the synch pair
2025-07-01 05:45:46.721 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:45:46.727 if eqi is None:
2025-07-01 05:45:46.732 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:45:46.738 atags = btags = ""
2025-07-01 05:45:46.747 cruncher.set_seqs(aelt, belt)
2025-07-01 05:45:46.757 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:45:46.765 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:45:46.774 if tag == 'replace':
2025-07-01 05:45:46.785 atags += '^' * la
2025-07-01 05:45:46.795 btags += '^' * lb
2025-07-01 05:45:46.806 elif tag == 'delete':
2025-07-01 05:45:46.819 atags += '-' * la
2025-07-01 05:45:46.830 elif tag == 'insert':
2025-07-01 05:45:46.838 btags += '+' * lb
2025-07-01 05:45:46.848 elif tag == 'equal':
2025-07-01 05:45:46.858 atags += ' ' * la
2025-07-01 05:45:46.867 btags += ' ' * lb
2025-07-01 05:45:46.875 else:
2025-07-01 05:45:46.886 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:45:46.895 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:45:46.903 else:
2025-07-01 05:45:46.914 # the synch pair is identical
2025-07-01 05:45:46.923 yield ' ' + aelt
2025-07-01 05:45:46.932
2025-07-01 05:45:46.939 # pump out diffs from after the synch point
2025-07-01 05:45:46.947 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:45:46.955
2025-07-01 05:45:46.965 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:45:46.975 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:45:46.986
2025-07-01 05:45:46.995 self = <difflib.Differ object at [hex]>
2025-07-01 05:45:47.005 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:45:47.012 alo = 117, ahi = 1101
2025-07-01 05:45:47.021 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:45:47.028 blo = 117, bhi = 1101
2025-07-01 05:45:47.035
2025-07-01 05:45:47.045 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:45:47.053 g = []
2025-07-01 05:45:47.063 if alo < ahi:
2025-07-01 05:45:47.070 if blo < bhi:
2025-07-01 05:45:47.082 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:45:47.092 else:
2025-07-01 05:45:47.101 g = self._dump('-', a, alo, ahi)
2025-07-01 05:45:47.109 elif blo < bhi:
2025-07-01 05:45:47.114 g = self._dump('+', b, blo, bhi)
2025-07-01 05:45:47.119
2025-07-01 05:45:47.124 > yield from g
2025-07-01 05:45:47.129
2025-07-01 05:45:47.135 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:45:47.142 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:45:47.149
2025-07-01 05:45:47.160 self = <difflib.Differ object at [hex]>
2025-07-01 05:45:47.169 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:45:47.175 alo = 117, ahi = 1101
2025-07-01 05:45:47.182 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:45:47.191 blo = 117, bhi = 1101
2025-07-01 05:45:47.197
2025-07-01 05:45:47.203 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:45:47.210 r"""
2025-07-01 05:45:47.221 When replacing one block of lines with another, search the blocks
2025-07-01 05:45:47.229 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:45:47.234 synch point, and intraline difference marking is done on the
2025-07-01 05:45:47.240 similar pair. Lots of work, but often worth it.
2025-07-01 05:45:47.246
2025-07-01 05:45:47.251 Example:
2025-07-01 05:45:47.257
2025-07-01 05:45:47.263 >>> d = Differ()
2025-07-01 05:45:47.270 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:45:47.281 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:45:47.290 >>> print(''.join(results), end="")
2025-07-01 05:45:47.301 - abcDefghiJkl
2025-07-01 05:45:47.315 + abcdefGhijkl
2025-07-01 05:45:47.327 """
2025-07-01 05:45:47.333
2025-07-01 05:45:47.340 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:45:47.347 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:45:47.354 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:45:47.360 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:45:47.367 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:45:47.375
2025-07-01 05:45:47.388 # search for the pair that matches best without being identical
2025-07-01 05:45:47.398 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:45:47.405 # on junk -- unless we have to)
2025-07-01 05:45:47.412 for j in range(blo, bhi):
2025-07-01 05:45:47.418 bj = b[j]
2025-07-01 05:45:47.423 cruncher.set_seq2(bj)
2025-07-01 05:45:47.435 for i in range(alo, ahi):
2025-07-01 05:45:47.445 ai = a[i]
2025-07-01 05:45:47.453 if ai == bj:
2025-07-01 05:45:47.460 if eqi is None:
2025-07-01 05:45:47.466 eqi, eqj = i, j
2025-07-01 05:45:47.472 continue
2025-07-01 05:45:47.479 cruncher.set_seq1(ai)
2025-07-01 05:45:47.485 # computing similarity is expensive, so use the quick
2025-07-01 05:45:47.491 # upper bounds first -- have seen this speed up messy
2025-07-01 05:45:47.498 # compares by a factor of 3.
2025-07-01 05:45:47.504 # note that ratio() is only expensive to compute the first
2025-07-01 05:45:47.511 # time it's called on a sequence pair; the expensive part
2025-07-01 05:45:47.519 # of the computation is cached by cruncher
2025-07-01 05:45:47.525 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:45:47.539 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:45:47.549 cruncher.ratio() > best_ratio:
2025-07-01 05:45:47.557 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:45:47.564 if best_ratio < cutoff:
2025-07-01 05:45:47.570 # no non-identical "pretty close" pair
2025-07-01 05:45:47.576 if eqi is None:
2025-07-01 05:45:47.582 # no identical pair either -- treat it as a straight replace
2025-07-01 05:45:47.590 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:45:47.597 return
2025-07-01 05:45:47.603 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:45:47.608 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:45:47.614 else:
2025-07-01 05:45:47.619 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:45:47.625 eqi = None
2025-07-01 05:45:47.631
2025-07-01 05:45:47.638 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:45:47.648 # identical
2025-07-01 05:45:47.657
2025-07-01 05:45:47.666 # pump out diffs from before the synch point
2025-07-01 05:45:47.673 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:45:47.679
2025-07-01 05:45:47.687 # do intraline marking on the synch pair
2025-07-01 05:45:47.699 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:45:47.708 if eqi is None:
2025-07-01 05:45:47.720 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:45:47.727 atags = btags = ""
2025-07-01 05:45:47.736 cruncher.set_seqs(aelt, belt)
2025-07-01 05:45:47.746 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:45:47.754 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:45:47.760 if tag == 'replace':
2025-07-01 05:45:47.767 atags += '^' * la
2025-07-01 05:45:47.773 btags += '^' * lb
2025-07-01 05:45:47.778 elif tag == 'delete':
2025-07-01 05:45:47.784 atags += '-' * la
2025-07-01 05:45:47.790 elif tag == 'insert':
2025-07-01 05:45:47.799 btags += '+' * lb
2025-07-01 05:45:47.805 elif tag == 'equal':
2025-07-01 05:45:47.814 atags += ' ' * la
2025-07-01 05:45:47.826 btags += ' ' * lb
2025-07-01 05:45:47.834 else:
2025-07-01 05:45:47.845 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:45:47.855 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:45:47.863 else:
2025-07-01 05:45:47.871 # the synch pair is identical
2025-07-01 05:45:47.880 yield ' ' + aelt
2025-07-01 05:45:47.888
2025-07-01 05:45:47.894 # pump out diffs from after the synch point
2025-07-01 05:45:47.902 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:45:47.913
2025-07-01 05:45:47.925 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:45:47.935 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:45:47.944
2025-07-01 05:45:47.952 self = <difflib.Differ object at [hex]>
2025-07-01 05:45:47.960 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:45:47.969 alo = 118, ahi = 1101
2025-07-01 05:45:47.982 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:45:47.994 blo = 118, bhi = 1101
2025-07-01 05:45:48.005
2025-07-01 05:45:48.016 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:45:48.024 g = []
2025-07-01 05:45:48.032 if alo < ahi:
2025-07-01 05:45:48.039 if blo < bhi:
2025-07-01 05:45:48.045 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:45:48.051 else:
2025-07-01 05:45:48.057 g = self._dump('-', a, alo, ahi)
2025-07-01 05:45:48.062 elif blo < bhi:
2025-07-01 05:45:48.068 g = self._dump('+', b, blo, bhi)
2025-07-01 05:45:48.074
2025-07-01 05:45:48.085 > yield from g
2025-07-01 05:45:48.095
2025-07-01 05:45:48.104 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:45:48.116 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:45:48.125
2025-07-01 05:45:48.136 self = <difflib.Differ object at [hex]>
2025-07-01 05:45:48.147 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:45:48.154 alo = 118, ahi = 1101
2025-07-01 05:45:48.164 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:45:48.170 blo = 118, bhi = 1101
2025-07-01 05:45:48.183
2025-07-01 05:45:48.193 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:45:48.202 r"""
2025-07-01 05:45:48.208 When replacing one block of lines with another, search the blocks
2025-07-01 05:45:48.215 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:45:48.222 synch point, and intraline difference marking is done on the
2025-07-01 05:45:48.228 similar pair. Lots of work, but often worth it.
2025-07-01 05:45:48.233
2025-07-01 05:45:48.238 Example:
2025-07-01 05:45:48.242
2025-07-01 05:45:48.251 >>> d = Differ()
2025-07-01 05:45:48.258 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:45:48.264 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:45:48.270 >>> print(''.join(results), end="")
2025-07-01 05:45:48.280 - abcDefghiJkl
2025-07-01 05:45:48.296 + abcdefGhijkl
2025-07-01 05:45:48.307 """
2025-07-01 05:45:48.313
2025-07-01 05:45:48.318 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:45:48.324 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:45:48.330 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:45:48.336 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:45:48.343 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:45:48.351
2025-07-01 05:45:48.359 # search for the pair that matches best without being identical
2025-07-01 05:45:48.366 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:45:48.371 # on junk -- unless we have to)
2025-07-01 05:45:48.379 for j in range(blo, bhi):
2025-07-01 05:45:48.388 bj = b[j]
2025-07-01 05:45:48.397 cruncher.set_seq2(bj)
2025-07-01 05:45:48.403 for i in range(alo, ahi):
2025-07-01 05:45:48.411 ai = a[i]
2025-07-01 05:45:48.422 if ai == bj:
2025-07-01 05:45:48.430 if eqi is None:
2025-07-01 05:45:48.437 eqi, eqj = i, j
2025-07-01 05:45:48.443 continue
2025-07-01 05:45:48.452 cruncher.set_seq1(ai)
2025-07-01 05:45:48.463 # computing similarity is expensive, so use the quick
2025-07-01 05:45:48.472 # upper bounds first -- have seen this speed up messy
2025-07-01 05:45:48.480 # compares by a factor of 3.
2025-07-01 05:45:48.487 # note that ratio() is only expensive to compute the first
2025-07-01 05:45:48.495 # time it's called on a sequence pair; the expensive part
2025-07-01 05:45:48.503 # of the computation is cached by cruncher
2025-07-01 05:45:48.513 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:45:48.523 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:45:48.532 cruncher.ratio() > best_ratio:
2025-07-01 05:45:48.544 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:45:48.555 if best_ratio < cutoff:
2025-07-01 05:45:48.567 # no non-identical "pretty close" pair
2025-07-01 05:45:48.576 if eqi is None:
2025-07-01 05:45:48.582 # no identical pair either -- treat it as a straight replace
2025-07-01 05:45:48.589 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:45:48.594 return
2025-07-01 05:45:48.598 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:45:48.603 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:45:48.611 else:
2025-07-01 05:45:48.620 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:45:48.629 eqi = None
2025-07-01 05:45:48.636
2025-07-01 05:45:48.642 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:45:48.649 # identical
2025-07-01 05:45:48.654
2025-07-01 05:45:48.660 # pump out diffs from before the synch point
2025-07-01 05:45:48.667 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:45:48.678
2025-07-01 05:45:48.687 # do intraline marking on the synch pair
2025-07-01 05:45:48.694 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:45:48.702 if eqi is None:
2025-07-01 05:45:48.711 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:45:48.721 atags = btags = ""
2025-07-01 05:45:48.732 cruncher.set_seqs(aelt, belt)
2025-07-01 05:45:48.742 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:45:48.753 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:45:48.766 if tag == 'replace':
2025-07-01 05:45:48.775 atags += '^' * la
2025-07-01 05:45:48.783 btags += '^' * lb
2025-07-01 05:45:48.790 elif tag == 'delete':
2025-07-01 05:45:48.801 atags += '-' * la
2025-07-01 05:45:48.809 elif tag == 'insert':
2025-07-01 05:45:48.816 btags += '+' * lb
2025-07-01 05:45:48.822 elif tag == 'equal':
2025-07-01 05:45:48.828 atags += ' ' * la
2025-07-01 05:45:48.838 btags += ' ' * lb
2025-07-01 05:45:48.846 else:
2025-07-01 05:45:48.853 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:45:48.859 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:45:48.868 else:
2025-07-01 05:45:48.877 # the synch pair is identical
2025-07-01 05:45:48.884 yield ' ' + aelt
2025-07-01 05:45:48.890
2025-07-01 05:45:48.900 # pump out diffs from after the synch point
2025-07-01 05:45:48.912 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:45:48.920
2025-07-01 05:45:48.927 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:45:48.936 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:45:48.948
2025-07-01 05:45:48.960 self = <difflib.Differ object at [hex]>
2025-07-01 05:45:48.970 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:45:48.978 alo = 119, ahi = 1101
2025-07-01 05:45:48.985 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:45:48.997 blo = 119, bhi = 1101
2025-07-01 05:45:49.006
2025-07-01 05:45:49.014 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:45:49.022 g = []
2025-07-01 05:45:49.031 if alo < ahi:
2025-07-01 05:45:49.040 if blo < bhi:
2025-07-01 05:45:49.053 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:45:49.061 else:
2025-07-01 05:45:49.069 g = self._dump('-', a, alo, ahi)
2025-07-01 05:45:49.076 elif blo < bhi:
2025-07-01 05:45:49.082 g = self._dump('+', b, blo, bhi)
2025-07-01 05:45:49.091
2025-07-01 05:45:49.100 > yield from g
2025-07-01 05:45:49.113
2025-07-01 05:45:49.125 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:45:49.134 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:45:49.141
2025-07-01 05:45:49.147 self = <difflib.Differ object at [hex]>
2025-07-01 05:45:49.154 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:45:49.164 alo = 119, ahi = 1101
2025-07-01 05:45:49.173 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:45:49.179 blo = 119, bhi = 1101
2025-07-01 05:45:49.185
2025-07-01 05:45:49.191 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:45:49.195 r"""
2025-07-01 05:45:49.201 When replacing one block of lines with another, search the blocks
2025-07-01 05:45:49.205 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:45:49.210 synch point, and intraline difference marking is done on the
2025-07-01 05:45:49.215 similar pair. Lots of work, but often worth it.
2025-07-01 05:45:49.220
2025-07-01 05:45:49.226 Example:
2025-07-01 05:45:49.231
2025-07-01 05:45:49.237 >>> d = Differ()
2025-07-01 05:45:49.243 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:45:49.250 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:45:49.257 >>> print(''.join(results), end="")
2025-07-01 05:45:49.264 - abcDefghiJkl
2025-07-01 05:45:49.277 + abcdefGhijkl
2025-07-01 05:45:49.291 """
2025-07-01 05:45:49.306
2025-07-01 05:45:49.315 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:45:49.321 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:45:49.326 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:45:49.331 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:45:49.336 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:45:49.340
2025-07-01 05:45:49.346 # search for the pair that matches best without being identical
2025-07-01 05:45:49.351 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:45:49.356 # on junk -- unless we have to)
2025-07-01 05:45:49.362 for j in range(blo, bhi):
2025-07-01 05:45:49.369 bj = b[j]
2025-07-01 05:45:49.376 cruncher.set_seq2(bj)
2025-07-01 05:45:49.382 for i in range(alo, ahi):
2025-07-01 05:45:49.387 ai = a[i]
2025-07-01 05:45:49.392 if ai == bj:
2025-07-01 05:45:49.398 if eqi is None:
2025-07-01 05:45:49.407 eqi, eqj = i, j
2025-07-01 05:45:49.414 continue
2025-07-01 05:45:49.421 cruncher.set_seq1(ai)
2025-07-01 05:45:49.428 # computing similarity is expensive, so use the quick
2025-07-01 05:45:49.434 # upper bounds first -- have seen this speed up messy
2025-07-01 05:45:49.439 # compares by a factor of 3.
2025-07-01 05:45:49.446 # note that ratio() is only expensive to compute the first
2025-07-01 05:45:49.457 # time it's called on a sequence pair; the expensive part
2025-07-01 05:45:49.465 # of the computation is cached by cruncher
2025-07-01 05:45:49.472 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:45:49.485 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:45:49.497 cruncher.ratio() > best_ratio:
2025-07-01 05:45:49.510 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:45:49.520 if best_ratio < cutoff:
2025-07-01 05:45:49.528 # no non-identical "pretty close" pair
2025-07-01 05:45:49.535 if eqi is None:
2025-07-01 05:45:49.541 # no identical pair either -- treat it as a straight replace
2025-07-01 05:45:49.546 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:45:49.556 return
2025-07-01 05:45:49.567 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:45:49.575 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:45:49.582 else:
2025-07-01 05:45:49.589 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:45:49.595 eqi = None
2025-07-01 05:45:49.600
2025-07-01 05:45:49.607 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:45:49.613 # identical
2025-07-01 05:45:49.621
2025-07-01 05:45:49.627 # pump out diffs from before the synch point
2025-07-01 05:45:49.640 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:45:49.649
2025-07-01 05:45:49.656 # do intraline marking on the synch pair
2025-07-01 05:45:49.663 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:45:49.669 if eqi is None:
2025-07-01 05:45:49.676 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:45:49.681 atags = btags = ""
2025-07-01 05:45:49.688 cruncher.set_seqs(aelt, belt)
2025-07-01 05:45:49.694 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:45:49.699 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:45:49.706 if tag == 'replace':
2025-07-01 05:45:49.712 atags += '^' * la
2025-07-01 05:45:49.718 btags += '^' * lb
2025-07-01 05:45:49.728 elif tag == 'delete':
2025-07-01 05:45:49.736 atags += '-' * la
2025-07-01 05:45:49.743 elif tag == 'insert':
2025-07-01 05:45:49.750 btags += '+' * lb
2025-07-01 05:45:49.761 elif tag == 'equal':
2025-07-01 05:45:49.769 atags += ' ' * la
2025-07-01 05:45:49.777 btags += ' ' * lb
2025-07-01 05:45:49.783 else:
2025-07-01 05:45:49.790 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:45:49.795 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:45:49.803 else:
2025-07-01 05:45:49.815 # the synch pair is identical
2025-07-01 05:45:49.823 yield ' ' + aelt
2025-07-01 05:45:49.830
2025-07-01 05:45:49.839 # pump out diffs from after the synch point
2025-07-01 05:45:49.848 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:45:49.855
2025-07-01 05:45:49.862 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:45:49.869 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:45:49.875
2025-07-01 05:45:49.885 self = <difflib.Differ object at [hex]>
2025-07-01 05:45:49.896 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:45:49.904 alo = 120, ahi = 1101
2025-07-01 05:45:49.913 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:45:49.918 blo = 120, bhi = 1101
2025-07-01 05:45:49.924
2025-07-01 05:45:49.931 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:45:49.942 g = []
2025-07-01 05:45:49.950 if alo < ahi:
2025-07-01 05:45:49.962 if blo < bhi:
2025-07-01 05:45:49.973 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:45:49.985 else:
2025-07-01 05:45:49.996 g = self._dump('-', a, alo, ahi)
2025-07-01 05:45:50.008 elif blo < bhi:
2025-07-01 05:45:50.018 g = self._dump('+', b, blo, bhi)
2025-07-01 05:45:50.027
2025-07-01 05:45:50.040 > yield from g
2025-07-01 05:45:50.050
2025-07-01 05:45:50.058 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:45:50.067 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:45:50.075
2025-07-01 05:45:50.087 self = <difflib.Differ object at [hex]>
2025-07-01 05:45:50.099 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:45:50.106 alo = 120, ahi = 1101
2025-07-01 05:45:50.116 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:45:50.127 blo = 120, bhi = 1101
2025-07-01 05:45:50.136
2025-07-01 05:45:50.143 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:45:50.150 r"""
2025-07-01 05:45:50.157 When replacing one block of lines with another, search the blocks
2025-07-01 05:45:50.163 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:45:50.170 synch point, and intraline difference marking is done on the
2025-07-01 05:45:50.177 similar pair. Lots of work, but often worth it.
2025-07-01 05:45:50.182
2025-07-01 05:45:50.187 Example:
2025-07-01 05:45:50.195
2025-07-01 05:45:50.203 >>> d = Differ()
2025-07-01 05:45:50.210 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:45:50.220 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:45:50.230 >>> print(''.join(results), end="")
2025-07-01 05:45:50.238 - abcDefghiJkl
2025-07-01 05:45:50.257 + abcdefGhijkl
2025-07-01 05:45:50.273 """
2025-07-01 05:45:50.280
2025-07-01 05:45:50.286 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:45:50.293 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:45:50.299 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:45:50.306 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:45:50.317 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:45:50.324
2025-07-01 05:45:50.331 # search for the pair that matches best without being identical
2025-07-01 05:45:50.343 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:45:50.353 # on junk -- unless we have to)
2025-07-01 05:45:50.362 for j in range(blo, bhi):
2025-07-01 05:45:50.371 bj = b[j]
2025-07-01 05:45:50.379 cruncher.set_seq2(bj)
2025-07-01 05:45:50.387 for i in range(alo, ahi):
2025-07-01 05:45:50.395 ai = a[i]
2025-07-01 05:45:50.405 if ai == bj:
2025-07-01 05:45:50.413 if eqi is None:
2025-07-01 05:45:50.425 eqi, eqj = i, j
2025-07-01 05:45:50.436 continue
2025-07-01 05:45:50.445 cruncher.set_seq1(ai)
2025-07-01 05:45:50.455 # computing similarity is expensive, so use the quick
2025-07-01 05:45:50.462 # upper bounds first -- have seen this speed up messy
2025-07-01 05:45:50.471 # compares by a factor of 3.
2025-07-01 05:45:50.482 # note that ratio() is only expensive to compute the first
2025-07-01 05:45:50.491 # time it's called on a sequence pair; the expensive part
2025-07-01 05:45:50.496 # of the computation is cached by cruncher
2025-07-01 05:45:50.502 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:45:50.508 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:45:50.514 cruncher.ratio() > best_ratio:
2025-07-01 05:45:50.525 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:45:50.535 if best_ratio < cutoff:
2025-07-01 05:45:50.542 # no non-identical "pretty close" pair
2025-07-01 05:45:50.552 if eqi is None:
2025-07-01 05:45:50.563 # no identical pair either -- treat it as a straight replace
2025-07-01 05:45:50.572 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:45:50.578 return
2025-07-01 05:45:50.591 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:45:50.599 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:45:50.607 else:
2025-07-01 05:45:50.614 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:45:50.620 eqi = None
2025-07-01 05:45:50.626
2025-07-01 05:45:50.632 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:45:50.638 # identical
2025-07-01 05:45:50.648
2025-07-01 05:45:50.658 # pump out diffs from before the synch point
2025-07-01 05:45:50.666 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:45:50.673
2025-07-01 05:45:50.679 # do intraline marking on the synch pair
2025-07-01 05:45:50.687 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:45:50.695 if eqi is None:
2025-07-01 05:45:50.705 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:45:50.716 atags = btags = ""
2025-07-01 05:45:50.727 cruncher.set_seqs(aelt, belt)
2025-07-01 05:45:50.737 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:45:50.743 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:45:50.749 if tag == 'replace':
2025-07-01 05:45:50.755 atags += '^' * la
2025-07-01 05:45:50.762 btags += '^' * lb
2025-07-01 05:45:50.773 elif tag == 'delete':
2025-07-01 05:45:50.781 atags += '-' * la
2025-07-01 05:45:50.788 elif tag == 'insert':
2025-07-01 05:45:50.797 btags += '+' * lb
2025-07-01 05:45:50.807 elif tag == 'equal':
2025-07-01 05:45:50.817 atags += ' ' * la
2025-07-01 05:45:50.827 btags += ' ' * lb
2025-07-01 05:45:50.839 else:
2025-07-01 05:45:50.851 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:45:50.860 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:45:50.867 else:
2025-07-01 05:45:50.875 # the synch pair is identical
2025-07-01 05:45:50.887 yield ' ' + aelt
2025-07-01 05:45:50.898
2025-07-01 05:45:50.909 # pump out diffs from after the synch point
2025-07-01 05:45:50.920 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:45:50.927
2025-07-01 05:45:50.935 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:45:50.946 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:45:50.957
2025-07-01 05:45:50.965 self = <difflib.Differ object at [hex]>
2025-07-01 05:45:50.974 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:45:50.984 alo = 121, ahi = 1101
2025-07-01 05:45:50.995 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:45:51.003 blo = 121, bhi = 1101
2025-07-01 05:45:51.010
2025-07-01 05:45:51.016 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:45:51.023 g = []
2025-07-01 05:45:51.036 if alo < ahi:
2025-07-01 05:45:51.048 if blo < bhi:
2025-07-01 05:45:51.056 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:45:51.063 else:
2025-07-01 05:45:51.070 g = self._dump('-', a, alo, ahi)
2025-07-01 05:45:51.083 elif blo < bhi:
2025-07-01 05:45:51.096 g = self._dump('+', b, blo, bhi)
2025-07-01 05:45:51.104
2025-07-01 05:45:51.111 > yield from g
2025-07-01 05:45:51.118
2025-07-01 05:45:51.123 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:45:51.128 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:45:51.134
2025-07-01 05:45:51.139 self = <difflib.Differ object at [hex]>
2025-07-01 05:45:51.146 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:45:51.156 alo = 121, ahi = 1101
2025-07-01 05:45:51.166 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:45:51.173 blo = 121, bhi = 1101
2025-07-01 05:45:51.182
2025-07-01 05:45:51.190 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:45:51.196 r"""
2025-07-01 05:45:51.204 When replacing one block of lines with another, search the blocks
2025-07-01 05:45:51.212 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:45:51.220 synch point, and intraline difference marking is done on the
2025-07-01 05:45:51.228 similar pair. Lots of work, but often worth it.
2025-07-01 05:45:51.235
2025-07-01 05:45:51.243 Example:
2025-07-01 05:45:51.255
2025-07-01 05:45:51.265 >>> d = Differ()
2025-07-01 05:45:51.273 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:45:51.280 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:45:51.287 >>> print(''.join(results), end="")
2025-07-01 05:45:51.293 - abcDefghiJkl
2025-07-01 05:45:51.306 + abcdefGhijkl
2025-07-01 05:45:51.327 """
2025-07-01 05:45:51.335
2025-07-01 05:45:51.343 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:45:51.358 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:45:51.368 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:45:51.375 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:45:51.382 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:45:51.389
2025-07-01 05:45:51.396 # search for the pair that matches best without being identical
2025-07-01 05:45:51.402 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:45:51.411 # on junk -- unless we have to)
2025-07-01 05:45:51.424 for j in range(blo, bhi):
2025-07-01 05:45:51.436 bj = b[j]
2025-07-01 05:45:51.450 cruncher.set_seq2(bj)
2025-07-01 05:45:51.462 for i in range(alo, ahi):
2025-07-01 05:45:51.475 ai = a[i]
2025-07-01 05:45:51.486 if ai == bj:
2025-07-01 05:45:51.499 if eqi is None:
2025-07-01 05:45:51.511 eqi, eqj = i, j
2025-07-01 05:45:51.523 continue
2025-07-01 05:45:51.532 cruncher.set_seq1(ai)
2025-07-01 05:45:51.538 # computing similarity is expensive, so use the quick
2025-07-01 05:45:51.545 # upper bounds first -- have seen this speed up messy
2025-07-01 05:45:51.551 # compares by a factor of 3.
2025-07-01 05:45:51.566 # note that ratio() is only expensive to compute the first
2025-07-01 05:45:51.577 # time it's called on a sequence pair; the expensive part
2025-07-01 05:45:51.590 # of the computation is cached by cruncher
2025-07-01 05:45:51.602 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:45:51.611 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:45:51.619 cruncher.ratio() > best_ratio:
2025-07-01 05:45:51.626 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:45:51.633 if best_ratio < cutoff:
2025-07-01 05:45:51.640 # no non-identical "pretty close" pair
2025-07-01 05:45:51.646 if eqi is None:
2025-07-01 05:45:51.653 # no identical pair either -- treat it as a straight replace
2025-07-01 05:45:51.660 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:45:51.667 return
2025-07-01 05:45:51.681 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:45:51.693 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:45:51.706 else:
2025-07-01 05:45:51.717 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:45:51.730 eqi = None
2025-07-01 05:45:51.741
2025-07-01 05:45:51.754 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:45:51.768 # identical
2025-07-01 05:45:51.777
2025-07-01 05:45:51.788 # pump out diffs from before the synch point
2025-07-01 05:45:51.799 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:45:51.806
2025-07-01 05:45:51.813 # do intraline marking on the synch pair
2025-07-01 05:45:51.820 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:45:51.826 if eqi is None:
2025-07-01 05:45:51.832 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:45:51.838 atags = btags = ""
2025-07-01 05:45:51.848 cruncher.set_seqs(aelt, belt)
2025-07-01 05:45:51.858 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:45:51.865 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:45:51.871 if tag == 'replace':
2025-07-01 05:45:51.879 atags += '^' * la
2025-07-01 05:45:51.890 btags += '^' * lb
2025-07-01 05:45:51.898 elif tag == 'delete':
2025-07-01 05:45:51.905 atags += '-' * la
2025-07-01 05:45:51.912 elif tag == 'insert':
2025-07-01 05:45:51.918 btags += '+' * lb
2025-07-01 05:45:51.930 elif tag == 'equal':
2025-07-01 05:45:51.942 atags += ' ' * la
2025-07-01 05:45:51.952 btags += ' ' * lb
2025-07-01 05:45:51.961 else:
2025-07-01 05:45:51.968 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:45:51.973 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:45:51.984 else:
2025-07-01 05:45:51.993 # the synch pair is identical
2025-07-01 05:45:52.004 yield ' ' + aelt
2025-07-01 05:45:52.029
2025-07-01 05:45:52.041 # pump out diffs from after the synch point
2025-07-01 05:45:52.053 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:45:52.062
2025-07-01 05:45:52.070 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:45:52.079 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:45:52.090
2025-07-01 05:45:52.099 self = <difflib.Differ object at [hex]>
2025-07-01 05:45:52.108 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:45:52.115 alo = 122, ahi = 1101
2025-07-01 05:45:52.126 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:45:52.134 blo = 122, bhi = 1101
2025-07-01 05:45:52.148
2025-07-01 05:45:52.160 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:45:52.168 g = []
2025-07-01 05:45:52.176 if alo < ahi:
2025-07-01 05:45:52.184 if blo < bhi:
2025-07-01 05:45:52.191 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:45:52.202 else:
2025-07-01 05:45:52.211 g = self._dump('-', a, alo, ahi)
2025-07-01 05:45:52.218 elif blo < bhi:
2025-07-01 05:45:52.225 g = self._dump('+', b, blo, bhi)
2025-07-01 05:45:52.232
2025-07-01 05:45:52.238 > yield from g
2025-07-01 05:45:52.245
2025-07-01 05:45:52.252 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:45:52.259 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:45:52.267
2025-07-01 05:45:52.278 self = <difflib.Differ object at [hex]>
2025-07-01 05:45:52.288 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:45:52.295 alo = 122, ahi = 1101
2025-07-01 05:45:52.302 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:45:52.311 blo = 122, bhi = 1101
2025-07-01 05:45:52.323
2025-07-01 05:45:52.331 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:45:52.338 r"""
2025-07-01 05:45:52.349 When replacing one block of lines with another, search the blocks
2025-07-01 05:45:52.356 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:45:52.364 synch point, and intraline difference marking is done on the
2025-07-01 05:45:52.372 similar pair. Lots of work, but often worth it.
2025-07-01 05:45:52.379
2025-07-01 05:45:52.390 Example:
2025-07-01 05:45:52.398
2025-07-01 05:45:52.405 >>> d = Differ()
2025-07-01 05:45:52.412 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:45:52.421 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:45:52.434 >>> print(''.join(results), end="")
2025-07-01 05:45:52.444 - abcDefghiJkl
2025-07-01 05:45:52.460 + abcdefGhijkl
2025-07-01 05:45:52.475 """
2025-07-01 05:45:52.486
2025-07-01 05:45:52.499 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:45:52.510 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:45:52.519 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:45:52.527 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:45:52.535 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:45:52.546
2025-07-01 05:45:52.556 # search for the pair that matches best without being identical
2025-07-01 05:45:52.564 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:45:52.575 # on junk -- unless we have to)
2025-07-01 05:45:52.581 for j in range(blo, bhi):
2025-07-01 05:45:52.587 bj = b[j]
2025-07-01 05:45:52.598 cruncher.set_seq2(bj)
2025-07-01 05:45:52.605 for i in range(alo, ahi):
2025-07-01 05:45:52.611 ai = a[i]
2025-07-01 05:45:52.618 if ai == bj:
2025-07-01 05:45:52.625 if eqi is None:
2025-07-01 05:45:52.632 eqi, eqj = i, j
2025-07-01 05:45:52.642 continue
2025-07-01 05:45:52.650 cruncher.set_seq1(ai)
2025-07-01 05:45:52.656 # computing similarity is expensive, so use the quick
2025-07-01 05:45:52.662 # upper bounds first -- have seen this speed up messy
2025-07-01 05:45:52.668 # compares by a factor of 3.
2025-07-01 05:45:52.674 # note that ratio() is only expensive to compute the first
2025-07-01 05:45:52.681 # time it's called on a sequence pair; the expensive part
2025-07-01 05:45:52.689 # of the computation is cached by cruncher
2025-07-01 05:45:52.698 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:45:52.705 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:45:52.711 cruncher.ratio() > best_ratio:
2025-07-01 05:45:52.716 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:45:52.721 if best_ratio < cutoff:
2025-07-01 05:45:52.726 # no non-identical "pretty close" pair
2025-07-01 05:45:52.730 if eqi is None:
2025-07-01 05:45:52.735 # no identical pair either -- treat it as a straight replace
2025-07-01 05:45:52.740 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:45:52.746 return
2025-07-01 05:45:52.752 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:45:52.758 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:45:52.766 else:
2025-07-01 05:45:52.774 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:45:52.782 eqi = None
2025-07-01 05:45:52.791
2025-07-01 05:45:52.799 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:45:52.805 # identical
2025-07-01 05:45:52.812
2025-07-01 05:45:52.824 # pump out diffs from before the synch point
2025-07-01 05:45:52.834 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:45:52.842
2025-07-01 05:45:52.849 # do intraline marking on the synch pair
2025-07-01 05:45:52.856 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:45:52.862 if eqi is None:
2025-07-01 05:45:52.867 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:45:52.872 atags = btags = ""
2025-07-01 05:45:52.877 cruncher.set_seqs(aelt, belt)
2025-07-01 05:45:52.883 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:45:52.889 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:45:52.895 if tag == 'replace':
2025-07-01 05:45:52.901 atags += '^' * la
2025-07-01 05:45:52.907 btags += '^' * lb
2025-07-01 05:45:52.915 elif tag == 'delete':
2025-07-01 05:45:52.922 atags += '-' * la
2025-07-01 05:45:52.932 elif tag == 'insert':
2025-07-01 05:45:52.943 btags += '+' * lb
2025-07-01 05:45:52.949 elif tag == 'equal':
2025-07-01 05:45:52.956 atags += ' ' * la
2025-07-01 05:45:52.963 btags += ' ' * lb
2025-07-01 05:45:52.970 else:
2025-07-01 05:45:52.978 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:45:52.989 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:45:52.998 else:
2025-07-01 05:45:53.010 # the synch pair is identical
2025-07-01 05:45:53.018 yield ' ' + aelt
2025-07-01 05:45:53.026
2025-07-01 05:45:53.033 # pump out diffs from after the synch point
2025-07-01 05:45:53.040 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:45:53.046
2025-07-01 05:45:53.051 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:45:53.056 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:45:53.060
2025-07-01 05:45:53.065 self = <difflib.Differ object at [hex]>
2025-07-01 05:45:53.070 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:45:53.076 alo = 123, ahi = 1101
2025-07-01 05:45:53.083 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:45:53.089 blo = 123, bhi = 1101
2025-07-01 05:45:53.096
2025-07-01 05:45:53.103 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:45:53.111 g = []
2025-07-01 05:45:53.118 if alo < ahi:
2025-07-01 05:45:53.124 if blo < bhi:
2025-07-01 05:45:53.132 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:45:53.139 else:
2025-07-01 05:45:53.146 g = self._dump('-', a, alo, ahi)
2025-07-01 05:45:53.159 elif blo < bhi:
2025-07-01 05:45:53.169 g = self._dump('+', b, blo, bhi)
2025-07-01 05:45:53.182
2025-07-01 05:45:53.191 > yield from g
2025-07-01 05:45:53.199
2025-07-01 05:45:53.208 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:45:53.219 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:45:53.228
2025-07-01 05:45:53.235 self = <difflib.Differ object at [hex]>
2025-07-01 05:45:53.244 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:45:53.251 alo = 123, ahi = 1101
2025-07-01 05:45:53.259 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:45:53.264 blo = 123, bhi = 1101
2025-07-01 05:45:53.270
2025-07-01 05:45:53.276 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:45:53.281 r"""
2025-07-01 05:45:53.288 When replacing one block of lines with another, search the blocks
2025-07-01 05:45:53.294 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:45:53.300 synch point, and intraline difference marking is done on the
2025-07-01 05:45:53.306 similar pair. Lots of work, but often worth it.
2025-07-01 05:45:53.316
2025-07-01 05:45:53.326 Example:
2025-07-01 05:45:53.333
2025-07-01 05:45:53.339 >>> d = Differ()
2025-07-01 05:45:53.345 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:45:53.350 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:45:53.354 >>> print(''.join(results), end="")
2025-07-01 05:45:53.359 - abcDefghiJkl
2025-07-01 05:45:53.385 + abcdefGhijkl
2025-07-01 05:45:53.400 """
2025-07-01 05:45:53.406
2025-07-01 05:45:53.413 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:45:53.420 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:45:53.427 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:45:53.434 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:45:53.446 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:45:53.458
2025-07-01 05:45:53.471 # search for the pair that matches best without being identical
2025-07-01 05:45:53.482 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:45:53.491 # on junk -- unless we have to)
2025-07-01 05:45:53.504 for j in range(blo, bhi):
2025-07-01 05:45:53.514 bj = b[j]
2025-07-01 05:45:53.523 cruncher.set_seq2(bj)
2025-07-01 05:45:53.534 for i in range(alo, ahi):
2025-07-01 05:45:53.545 ai = a[i]
2025-07-01 05:45:53.558 if ai == bj:
2025-07-01 05:45:53.569 if eqi is None:
2025-07-01 05:45:53.581 eqi, eqj = i, j
2025-07-01 05:45:53.594 continue
2025-07-01 05:45:53.602 cruncher.set_seq1(ai)
2025-07-01 05:45:53.611 # computing similarity is expensive, so use the quick
2025-07-01 05:45:53.622 # upper bounds first -- have seen this speed up messy
2025-07-01 05:45:53.632 # compares by a factor of 3.
2025-07-01 05:45:53.642 # note that ratio() is only expensive to compute the first
2025-07-01 05:45:53.653 # time it's called on a sequence pair; the expensive part
2025-07-01 05:45:53.664 # of the computation is cached by cruncher
2025-07-01 05:45:53.673 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:45:53.680 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:45:53.685 cruncher.ratio() > best_ratio:
2025-07-01 05:45:53.690 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:45:53.695 if best_ratio < cutoff:
2025-07-01 05:45:53.699 # no non-identical "pretty close" pair
2025-07-01 05:45:53.704 if eqi is None:
2025-07-01 05:45:53.708 # no identical pair either -- treat it as a straight replace
2025-07-01 05:45:53.713 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:45:53.717 return
2025-07-01 05:45:53.723 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:45:53.728 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:45:53.732 else:
2025-07-01 05:45:53.737 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:45:53.742 eqi = None
2025-07-01 05:45:53.747
2025-07-01 05:45:53.752 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:45:53.758 # identical
2025-07-01 05:45:53.763
2025-07-01 05:45:53.771 # pump out diffs from before the synch point
2025-07-01 05:45:53.779 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:45:53.785
2025-07-01 05:45:53.791 # do intraline marking on the synch pair
2025-07-01 05:45:53.797 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:45:53.805 if eqi is None:
2025-07-01 05:45:53.813 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:45:53.822 atags = btags = ""
2025-07-01 05:45:53.834 cruncher.set_seqs(aelt, belt)
2025-07-01 05:45:53.845 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:45:53.852 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:45:53.859 if tag == 'replace':
2025-07-01 05:45:53.865 atags += '^' * la
2025-07-01 05:45:53.874 btags += '^' * lb
2025-07-01 05:45:53.881 elif tag == 'delete':
2025-07-01 05:45:53.894 atags += '-' * la
2025-07-01 05:45:53.904 elif tag == 'insert':
2025-07-01 05:45:53.913 btags += '+' * lb
2025-07-01 05:45:53.921 elif tag == 'equal':
2025-07-01 05:45:53.926 atags += ' ' * la
2025-07-01 05:45:53.931 btags += ' ' * lb
2025-07-01 05:45:53.936 else:
2025-07-01 05:45:53.940 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:45:53.945 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:45:53.951 else:
2025-07-01 05:45:53.957 # the synch pair is identical
2025-07-01 05:45:53.973 yield ' ' + aelt
2025-07-01 05:45:53.979
2025-07-01 05:45:53.986 # pump out diffs from after the synch point
2025-07-01 05:45:53.994 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:45:54.000
2025-07-01 05:45:54.006 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:45:54.011 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:45:54.016
2025-07-01 05:45:54.022 self = <difflib.Differ object at [hex]>
2025-07-01 05:45:54.028 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:45:54.034 alo = 124, ahi = 1101
2025-07-01 05:45:54.043 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:45:54.054 blo = 124, bhi = 1101
2025-07-01 05:45:54.065
2025-07-01 05:45:54.075 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:45:54.084 g = []
2025-07-01 05:45:54.091 if alo < ahi:
2025-07-01 05:45:54.099 if blo < bhi:
2025-07-01 05:45:54.111 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:45:54.119 else:
2025-07-01 05:45:54.125 g = self._dump('-', a, alo, ahi)
2025-07-01 05:45:54.130 elif blo < bhi:
2025-07-01 05:45:54.137 g = self._dump('+', b, blo, bhi)
2025-07-01 05:45:54.146
2025-07-01 05:45:54.152 > yield from g
2025-07-01 05:45:54.166
2025-07-01 05:45:54.176 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:45:54.185 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:45:54.192
2025-07-01 05:45:54.199 self = <difflib.Differ object at [hex]>
2025-07-01 05:45:54.206 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:45:54.219 alo = 124, ahi = 1101
2025-07-01 05:45:54.229 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:45:54.236 blo = 124, bhi = 1101
2025-07-01 05:45:54.242
2025-07-01 05:45:54.253 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:45:54.261 r"""
2025-07-01 05:45:54.268 When replacing one block of lines with another, search the blocks
2025-07-01 05:45:54.274 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:45:54.280 synch point, and intraline difference marking is done on the
2025-07-01 05:45:54.286 similar pair. Lots of work, but often worth it.
2025-07-01 05:45:54.291
2025-07-01 05:45:54.298 Example:
2025-07-01 05:45:54.309
2025-07-01 05:45:54.318 >>> d = Differ()
2025-07-01 05:45:54.325 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:45:54.331 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:45:54.340 >>> print(''.join(results), end="")
2025-07-01 05:45:54.348 - abcDefghiJkl
2025-07-01 05:45:54.360 + abcdefGhijkl
2025-07-01 05:45:54.372 """
2025-07-01 05:45:54.377
2025-07-01 05:45:54.384 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:45:54.390 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:45:54.396 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:45:54.402 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:45:54.408 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:45:54.414
2025-07-01 05:45:54.424 # search for the pair that matches best without being identical
2025-07-01 05:45:54.435 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:45:54.447 # on junk -- unless we have to)
2025-07-01 05:45:54.460 for j in range(blo, bhi):
2025-07-01 05:45:54.472 bj = b[j]
2025-07-01 05:45:54.483 cruncher.set_seq2(bj)
2025-07-01 05:45:54.491 for i in range(alo, ahi):
2025-07-01 05:45:54.498 ai = a[i]
2025-07-01 05:45:54.504 if ai == bj:
2025-07-01 05:45:54.517 if eqi is None:
2025-07-01 05:45:54.530 eqi, eqj = i, j
2025-07-01 05:45:54.539 continue
2025-07-01 05:45:54.551 cruncher.set_seq1(ai)
2025-07-01 05:45:54.560 # computing similarity is expensive, so use the quick
2025-07-01 05:45:54.568 # upper bounds first -- have seen this speed up messy
2025-07-01 05:45:54.574 # compares by a factor of 3.
2025-07-01 05:45:54.586 # note that ratio() is only expensive to compute the first
2025-07-01 05:45:54.596 # time it's called on a sequence pair; the expensive part
2025-07-01 05:45:54.605 # of the computation is cached by cruncher
2025-07-01 05:45:54.615 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:45:54.625 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:45:54.633 cruncher.ratio() > best_ratio:
2025-07-01 05:45:54.644 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:45:54.656 if best_ratio < cutoff:
2025-07-01 05:45:54.667 # no non-identical "pretty close" pair
2025-07-01 05:45:54.676 if eqi is None:
2025-07-01 05:45:54.683 # no identical pair either -- treat it as a straight replace
2025-07-01 05:45:54.696 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:45:54.708 return
2025-07-01 05:45:54.719 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:45:54.728 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:45:54.735 else:
2025-07-01 05:45:54.743 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:45:54.755 eqi = None
2025-07-01 05:45:54.765
2025-07-01 05:45:54.776 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:45:54.788 # identical
2025-07-01 05:45:54.801
2025-07-01 05:45:54.814 # pump out diffs from before the synch point
2025-07-01 05:45:54.823 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:45:54.830
2025-07-01 05:45:54.840 # do intraline marking on the synch pair
2025-07-01 05:45:54.849 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:45:54.862 if eqi is None:
2025-07-01 05:45:54.871 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:45:54.879 atags = btags = ""
2025-07-01 05:45:54.887 cruncher.set_seqs(aelt, belt)
2025-07-01 05:45:54.899 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:45:54.910 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:45:54.922 if tag == 'replace':
2025-07-01 05:45:54.931 atags += '^' * la
2025-07-01 05:45:54.941 btags += '^' * lb
2025-07-01 05:45:54.956 elif tag == 'delete':
2025-07-01 05:45:54.965 atags += '-' * la
2025-07-01 05:45:54.973 elif tag == 'insert':
2025-07-01 05:45:54.980 btags += '+' * lb
2025-07-01 05:45:54.986 elif tag == 'equal':
2025-07-01 05:45:54.992 atags += ' ' * la
2025-07-01 05:45:54.998 btags += ' ' * lb
2025-07-01 05:45:55.004 else:
2025-07-01 05:45:55.013 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:45:55.021 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:45:55.027 else:
2025-07-01 05:45:55.033 # the synch pair is identical
2025-07-01 05:45:55.038 yield ' ' + aelt
2025-07-01 05:45:55.043
2025-07-01 05:45:55.048 # pump out diffs from after the synch point
2025-07-01 05:45:55.053 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:45:55.058
2025-07-01 05:45:55.063 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:45:55.069 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:45:55.076
2025-07-01 05:45:55.082 self = <difflib.Differ object at [hex]>
2025-07-01 05:45:55.088 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:45:55.094 alo = 125, ahi = 1101
2025-07-01 05:45:55.102 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:45:55.113 blo = 125, bhi = 1101
2025-07-01 05:45:55.125
2025-07-01 05:45:55.135 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:45:55.144 g = []
2025-07-01 05:45:55.154 if alo < ahi:
2025-07-01 05:45:55.162 if blo < bhi:
2025-07-01 05:45:55.169 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:45:55.175 else:
2025-07-01 05:45:55.182 g = self._dump('-', a, alo, ahi)
2025-07-01 05:45:55.187 elif blo < bhi:
2025-07-01 05:45:55.195 g = self._dump('+', b, blo, bhi)
2025-07-01 05:45:55.208
2025-07-01 05:45:55.216 > yield from g
2025-07-01 05:45:55.223
2025-07-01 05:45:55.229 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:45:55.236 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:45:55.242
2025-07-01 05:45:55.248 self = <difflib.Differ object at [hex]>
2025-07-01 05:45:55.255 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:45:55.260 alo = 125, ahi = 1101
2025-07-01 05:45:55.272 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:45:55.282 blo = 125, bhi = 1101
2025-07-01 05:45:55.291
2025-07-01 05:45:55.301 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:45:55.312 r"""
2025-07-01 05:45:55.324 When replacing one block of lines with another, search the blocks
2025-07-01 05:45:55.335 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:45:55.345 synch point, and intraline difference marking is done on the
2025-07-01 05:45:55.352 similar pair. Lots of work, but often worth it.
2025-07-01 05:45:55.359
2025-07-01 05:45:55.364 Example:
2025-07-01 05:45:55.370
2025-07-01 05:45:55.377 >>> d = Differ()
2025-07-01 05:45:55.385 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:45:55.391 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:45:55.396 >>> print(''.join(results), end="")
2025-07-01 05:45:55.401 - abcDefghiJkl
2025-07-01 05:45:55.417 + abcdefGhijkl
2025-07-01 05:45:55.438 """
2025-07-01 05:45:55.447
2025-07-01 05:45:55.455 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:45:55.462 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:45:55.468 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:45:55.474 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:45:55.480 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:45:55.485
2025-07-01 05:45:55.496 # search for the pair that matches best without being identical
2025-07-01 05:45:55.505 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:45:55.513 # on junk -- unless we have to)
2025-07-01 05:45:55.519 for j in range(blo, bhi):
2025-07-01 05:45:55.525 bj = b[j]
2025-07-01 05:45:55.533 cruncher.set_seq2(bj)
2025-07-01 05:45:55.544 for i in range(alo, ahi):
2025-07-01 05:45:55.553 ai = a[i]
2025-07-01 05:45:55.560 if ai == bj:
2025-07-01 05:45:55.569 if eqi is None:
2025-07-01 05:45:55.584 eqi, eqj = i, j
2025-07-01 05:45:55.594 continue
2025-07-01 05:45:55.605 cruncher.set_seq1(ai)
2025-07-01 05:45:55.617 # computing similarity is expensive, so use the quick
2025-07-01 05:45:55.630 # upper bounds first -- have seen this speed up messy
2025-07-01 05:45:55.641 # compares by a factor of 3.
2025-07-01 05:45:55.650 # note that ratio() is only expensive to compute the first
2025-07-01 05:45:55.661 # time it's called on a sequence pair; the expensive part
2025-07-01 05:45:55.673 # of the computation is cached by cruncher
2025-07-01 05:45:55.683 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:45:55.691 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:45:55.698 cruncher.ratio() > best_ratio:
2025-07-01 05:45:55.711 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:45:55.721 if best_ratio < cutoff:
2025-07-01 05:45:55.731 # no non-identical "pretty close" pair
2025-07-01 05:45:55.742 if eqi is None:
2025-07-01 05:45:55.752 # no identical pair either -- treat it as a straight replace
2025-07-01 05:45:55.760 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:45:55.767 return
2025-07-01 05:45:55.774 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:45:55.785 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:45:55.795 else:
2025-07-01 05:45:55.807 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:45:55.816 eqi = None
2025-07-01 05:45:55.824
2025-07-01 05:45:55.831 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:45:55.839 # identical
2025-07-01 05:45:55.849
2025-07-01 05:45:55.859 # pump out diffs from before the synch point
2025-07-01 05:45:55.870 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:45:55.882
2025-07-01 05:45:55.890 # do intraline marking on the synch pair
2025-07-01 05:45:55.897 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:45:55.903 if eqi is None:
2025-07-01 05:45:55.910 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:45:55.922 atags = btags = ""
2025-07-01 05:45:55.933 cruncher.set_seqs(aelt, belt)
2025-07-01 05:45:55.942 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:45:55.950 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:45:55.960 if tag == 'replace':
2025-07-01 05:45:55.970 atags += '^' * la
2025-07-01 05:45:55.982 btags += '^' * lb
2025-07-01 05:45:55.993 elif tag == 'delete':
2025-07-01 05:45:56.000 atags += '-' * la
2025-07-01 05:45:56.007 elif tag == 'insert':
2025-07-01 05:45:56.014 btags += '+' * lb
2025-07-01 05:45:56.024 elif tag == 'equal':
2025-07-01 05:45:56.034 atags += ' ' * la
2025-07-01 05:45:56.041 btags += ' ' * lb
2025-07-01 05:45:56.049 else:
2025-07-01 05:45:56.056 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:45:56.063 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:45:56.069 else:
2025-07-01 05:45:56.075 # the synch pair is identical
2025-07-01 05:45:56.081 yield ' ' + aelt
2025-07-01 05:45:56.086
2025-07-01 05:45:56.091 # pump out diffs from after the synch point
2025-07-01 05:45:56.096 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:45:56.100
2025-07-01 05:45:56.106 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:45:56.116 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:45:56.125
2025-07-01 05:45:56.132 self = <difflib.Differ object at [hex]>
2025-07-01 05:45:56.139 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:45:56.144 alo = 126, ahi = 1101
2025-07-01 05:45:56.155 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:45:56.167 blo = 126, bhi = 1101
2025-07-01 05:45:56.177
2025-07-01 05:45:56.185 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:45:56.191 g = []
2025-07-01 05:45:56.199 if alo < ahi:
2025-07-01 05:45:56.204 if blo < bhi:
2025-07-01 05:45:56.211 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:45:56.217 else:
2025-07-01 05:45:56.223 g = self._dump('-', a, alo, ahi)
2025-07-01 05:45:56.235 elif blo < bhi:
2025-07-01 05:45:56.242 g = self._dump('+', b, blo, bhi)
2025-07-01 05:45:56.249
2025-07-01 05:45:56.256 > yield from g
2025-07-01 05:45:56.262
2025-07-01 05:45:56.272 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:45:56.283 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:45:56.291
2025-07-01 05:45:56.299 self = <difflib.Differ object at [hex]>
2025-07-01 05:45:56.312 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:45:56.321 alo = 126, ahi = 1101
2025-07-01 05:45:56.329 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:45:56.342 blo = 126, bhi = 1101
2025-07-01 05:45:56.353
2025-07-01 05:45:56.360 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:45:56.366 r"""
2025-07-01 05:45:56.373 When replacing one block of lines with another, search the blocks
2025-07-01 05:45:56.379 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:45:56.387 synch point, and intraline difference marking is done on the
2025-07-01 05:45:56.398 similar pair. Lots of work, but often worth it.
2025-07-01 05:45:56.408
2025-07-01 05:45:56.417 Example:
2025-07-01 05:45:56.423
2025-07-01 05:45:56.431 >>> d = Differ()
2025-07-01 05:45:56.442 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:45:56.450 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:45:56.457 >>> print(''.join(results), end="")
2025-07-01 05:45:56.464 - abcDefghiJkl
2025-07-01 05:45:56.478 + abcdefGhijkl
2025-07-01 05:45:56.491 """
2025-07-01 05:45:56.499
2025-07-01 05:45:56.510 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:45:56.521 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:45:56.531 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:45:56.545 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:45:56.552 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:45:56.564
2025-07-01 05:45:56.572 # search for the pair that matches best without being identical
2025-07-01 05:45:56.578 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:45:56.590 # on junk -- unless we have to)
2025-07-01 05:45:56.600 for j in range(blo, bhi):
2025-07-01 05:45:56.607 bj = b[j]
2025-07-01 05:45:56.612 cruncher.set_seq2(bj)
2025-07-01 05:45:56.620 for i in range(alo, ahi):
2025-07-01 05:45:56.629 ai = a[i]
2025-07-01 05:45:56.636 if ai == bj:
2025-07-01 05:45:56.647 if eqi is None:
2025-07-01 05:45:56.659 eqi, eqj = i, j
2025-07-01 05:45:56.669 continue
2025-07-01 05:45:56.682 cruncher.set_seq1(ai)
2025-07-01 05:45:56.694 # computing similarity is expensive, so use the quick
2025-07-01 05:45:56.702 # upper bounds first -- have seen this speed up messy
2025-07-01 05:45:56.715 # compares by a factor of 3.
2025-07-01 05:45:56.724 # note that ratio() is only expensive to compute the first
2025-07-01 05:45:56.733 # time it's called on a sequence pair; the expensive part
2025-07-01 05:45:56.740 # of the computation is cached by cruncher
2025-07-01 05:45:56.748 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:45:56.761 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:45:56.772 cruncher.ratio() > best_ratio:
2025-07-01 05:45:56.783 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:45:56.795 if best_ratio < cutoff:
2025-07-01 05:45:56.804 # no non-identical "pretty close" pair
2025-07-01 05:45:56.812 if eqi is None:
2025-07-01 05:45:56.826 # no identical pair either -- treat it as a straight replace
2025-07-01 05:45:56.839 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:45:56.846 return
2025-07-01 05:45:56.853 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:45:56.859 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:45:56.864 else:
2025-07-01 05:45:56.869 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:45:56.874 eqi = None
2025-07-01 05:45:56.878
2025-07-01 05:45:56.884 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:45:56.890 # identical
2025-07-01 05:45:56.895
2025-07-01 05:45:56.904 # pump out diffs from before the synch point
2025-07-01 05:45:56.911 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:45:56.916
2025-07-01 05:45:56.922 # do intraline marking on the synch pair
2025-07-01 05:45:56.932 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:45:56.941 if eqi is None:
2025-07-01 05:45:56.950 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:45:56.961 atags = btags = ""
2025-07-01 05:45:56.969 cruncher.set_seqs(aelt, belt)
2025-07-01 05:45:56.975 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:45:56.982 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:45:56.988 if tag == 'replace':
2025-07-01 05:45:56.994 atags += '^' * la
2025-07-01 05:45:57.000 btags += '^' * lb
2025-07-01 05:45:57.007 elif tag == 'delete':
2025-07-01 05:45:57.013 atags += '-' * la
2025-07-01 05:45:57.018 elif tag == 'insert':
2025-07-01 05:45:57.023 btags += '+' * lb
2025-07-01 05:45:57.029 elif tag == 'equal':
2025-07-01 05:45:57.035 atags += ' ' * la
2025-07-01 05:45:57.043 btags += ' ' * lb
2025-07-01 05:45:57.052 else:
2025-07-01 05:45:57.064 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:45:57.075 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:45:57.083 else:
2025-07-01 05:45:57.089 # the synch pair is identical
2025-07-01 05:45:57.094 yield ' ' + aelt
2025-07-01 05:45:57.104
2025-07-01 05:45:57.116 # pump out diffs from after the synch point
2025-07-01 05:45:57.130 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:45:57.140
2025-07-01 05:45:57.148 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:45:57.155 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:45:57.162
2025-07-01 05:45:57.173 self = <difflib.Differ object at [hex]>
2025-07-01 05:45:57.182 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:45:57.189 alo = 127, ahi = 1101
2025-07-01 05:45:57.196 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:45:57.204 blo = 127, bhi = 1101
2025-07-01 05:45:57.211
2025-07-01 05:45:57.218 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:45:57.228 g = []
2025-07-01 05:45:57.239 if alo < ahi:
2025-07-01 05:45:57.250 if blo < bhi:
2025-07-01 05:45:57.260 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:45:57.272 else:
2025-07-01 05:45:57.285 g = self._dump('-', a, alo, ahi)
2025-07-01 05:45:57.297 elif blo < bhi:
2025-07-01 05:45:57.307 g = self._dump('+', b, blo, bhi)
2025-07-01 05:45:57.318
2025-07-01 05:45:57.327 > yield from g
2025-07-01 05:45:57.333
2025-07-01 05:45:57.339 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:45:57.346 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:45:57.358
2025-07-01 05:45:57.367 self = <difflib.Differ object at [hex]>
2025-07-01 05:45:57.375 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:45:57.380 alo = 127, ahi = 1101
2025-07-01 05:45:57.387 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:45:57.401 blo = 127, bhi = 1101
2025-07-01 05:45:57.413
2025-07-01 05:45:57.419 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:45:57.425 r"""
2025-07-01 05:45:57.433 When replacing one block of lines with another, search the blocks
2025-07-01 05:45:57.443 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:45:57.450 synch point, and intraline difference marking is done on the
2025-07-01 05:45:57.457 similar pair. Lots of work, but often worth it.
2025-07-01 05:45:57.464
2025-07-01 05:45:57.471 Example:
2025-07-01 05:45:57.477
2025-07-01 05:45:57.484 >>> d = Differ()
2025-07-01 05:45:57.491 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:45:57.499 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:45:57.507 >>> print(''.join(results), end="")
2025-07-01 05:45:57.515 - abcDefghiJkl
2025-07-01 05:45:57.533 + abcdefGhijkl
2025-07-01 05:45:57.547 """
2025-07-01 05:45:57.552
2025-07-01 05:45:57.566 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:45:57.575 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:45:57.583 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:45:57.590 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:45:57.601 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:45:57.610
2025-07-01 05:45:57.618 # search for the pair that matches best without being identical
2025-07-01 05:45:57.626 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:45:57.639 # on junk -- unless we have to)
2025-07-01 05:45:57.649 for j in range(blo, bhi):
2025-07-01 05:45:57.661 bj = b[j]
2025-07-01 05:45:57.671 cruncher.set_seq2(bj)
2025-07-01 05:45:57.679 for i in range(alo, ahi):
2025-07-01 05:45:57.686 ai = a[i]
2025-07-01 05:45:57.693 if ai == bj:
2025-07-01 05:45:57.699 if eqi is None:
2025-07-01 05:45:57.705 eqi, eqj = i, j
2025-07-01 05:45:57.711 continue
2025-07-01 05:45:57.717 cruncher.set_seq1(ai)
2025-07-01 05:45:57.723 # computing similarity is expensive, so use the quick
2025-07-01 05:45:57.728 # upper bounds first -- have seen this speed up messy
2025-07-01 05:45:57.739 # compares by a factor of 3.
2025-07-01 05:45:57.749 # note that ratio() is only expensive to compute the first
2025-07-01 05:45:57.756 # time it's called on a sequence pair; the expensive part
2025-07-01 05:45:57.762 # of the computation is cached by cruncher
2025-07-01 05:45:57.773 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:45:57.782 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:45:57.790 cruncher.ratio() > best_ratio:
2025-07-01 05:45:57.796 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:45:57.801 if best_ratio < cutoff:
2025-07-01 05:45:57.807 # no non-identical "pretty close" pair
2025-07-01 05:45:57.813 if eqi is None:
2025-07-01 05:45:57.818 # no identical pair either -- treat it as a straight replace
2025-07-01 05:45:57.824 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:45:57.830 return
2025-07-01 05:45:57.835 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:45:57.842 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:45:57.847 else:
2025-07-01 05:45:57.853 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:45:57.858 eqi = None
2025-07-01 05:45:57.868
2025-07-01 05:45:57.877 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:45:57.884 # identical
2025-07-01 05:45:57.891
2025-07-01 05:45:57.902 # pump out diffs from before the synch point
2025-07-01 05:45:57.912 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:45:57.918
2025-07-01 05:45:57.924 # do intraline marking on the synch pair
2025-07-01 05:45:57.928 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:45:57.935 if eqi is None:
2025-07-01 05:45:57.942 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:45:57.947 atags = btags = ""
2025-07-01 05:45:57.952 cruncher.set_seqs(aelt, belt)
2025-07-01 05:45:57.956 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:45:57.962 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:45:57.966 if tag == 'replace':
2025-07-01 05:45:57.971 atags += '^' * la
2025-07-01 05:45:57.975 btags += '^' * lb
2025-07-01 05:45:57.980 elif tag == 'delete':
2025-07-01 05:45:57.985 atags += '-' * la
2025-07-01 05:45:57.991 elif tag == 'insert':
2025-07-01 05:45:57.995 btags += '+' * lb
2025-07-01 05:45:58.000 elif tag == 'equal':
2025-07-01 05:45:58.009 atags += ' ' * la
2025-07-01 05:45:58.016 btags += ' ' * lb
2025-07-01 05:45:58.026 else:
2025-07-01 05:45:58.035 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:45:58.049 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:45:58.061 else:
2025-07-01 05:45:58.067 # the synch pair is identical
2025-07-01 05:45:58.079 yield ' ' + aelt
2025-07-01 05:45:58.091
2025-07-01 05:45:58.102 # pump out diffs from after the synch point
2025-07-01 05:45:58.113 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:45:58.120
2025-07-01 05:45:58.128 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:45:58.134 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:45:58.140
2025-07-01 05:45:58.147 self = <difflib.Differ object at [hex]>
2025-07-01 05:45:58.159 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:45:58.168 alo = 128, ahi = 1101
2025-07-01 05:45:58.178 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:45:58.188 blo = 128, bhi = 1101
2025-07-01 05:45:58.195
2025-07-01 05:45:58.203 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:45:58.209 g = []
2025-07-01 05:45:58.222 if alo < ahi:
2025-07-01 05:45:58.233 if blo < bhi:
2025-07-01 05:45:58.242 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:45:58.253 else:
2025-07-01 05:45:58.266 g = self._dump('-', a, alo, ahi)
2025-07-01 05:45:58.277 elif blo < bhi:
2025-07-01 05:45:58.288 g = self._dump('+', b, blo, bhi)
2025-07-01 05:45:58.299
2025-07-01 05:45:58.308 > yield from g
2025-07-01 05:45:58.316
2025-07-01 05:45:58.322 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:45:58.329 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:45:58.334
2025-07-01 05:45:58.341 self = <difflib.Differ object at [hex]>
2025-07-01 05:45:58.349 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:45:58.354 alo = 128, ahi = 1101
2025-07-01 05:45:58.361 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:45:58.367 blo = 128, bhi = 1101
2025-07-01 05:45:58.372
2025-07-01 05:45:58.378 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:45:58.384 r"""
2025-07-01 05:45:58.390 When replacing one block of lines with another, search the blocks
2025-07-01 05:45:58.400 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:45:58.410 synch point, and intraline difference marking is done on the
2025-07-01 05:45:58.417 similar pair. Lots of work, but often worth it.
2025-07-01 05:45:58.423
2025-07-01 05:45:58.431 Example:
2025-07-01 05:45:58.440
2025-07-01 05:45:58.449 >>> d = Differ()
2025-07-01 05:45:58.456 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:45:58.462 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:45:58.473 >>> print(''.join(results), end="")
2025-07-01 05:45:58.482 - abcDefghiJkl
2025-07-01 05:45:58.496 + abcdefGhijkl
2025-07-01 05:45:58.515 """
2025-07-01 05:45:58.527
2025-07-01 05:45:58.536 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:45:58.544 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:45:58.555 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:45:58.566 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:45:58.578 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:45:58.588
2025-07-01 05:45:58.599 # search for the pair that matches best without being identical
2025-07-01 05:45:58.607 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:45:58.614 # on junk -- unless we have to)
2025-07-01 05:45:58.621 for j in range(blo, bhi):
2025-07-01 05:45:58.627 bj = b[j]
2025-07-01 05:45:58.632 cruncher.set_seq2(bj)
2025-07-01 05:45:58.638 for i in range(alo, ahi):
2025-07-01 05:45:58.650 ai = a[i]
2025-07-01 05:45:58.661 if ai == bj:
2025-07-01 05:45:58.669 if eqi is None:
2025-07-01 05:45:58.676 eqi, eqj = i, j
2025-07-01 05:45:58.683 continue
2025-07-01 05:45:58.691 cruncher.set_seq1(ai)
2025-07-01 05:45:58.701 # computing similarity is expensive, so use the quick
2025-07-01 05:45:58.714 # upper bounds first -- have seen this speed up messy
2025-07-01 05:45:58.725 # compares by a factor of 3.
2025-07-01 05:45:58.736 # note that ratio() is only expensive to compute the first
2025-07-01 05:45:58.746 # time it's called on a sequence pair; the expensive part
2025-07-01 05:45:58.754 # of the computation is cached by cruncher
2025-07-01 05:45:58.763 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:45:58.774 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:45:58.783 cruncher.ratio() > best_ratio:
2025-07-01 05:45:58.789 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:45:58.796 if best_ratio < cutoff:
2025-07-01 05:45:58.802 # no non-identical "pretty close" pair
2025-07-01 05:45:58.807 if eqi is None:
2025-07-01 05:45:58.813 # no identical pair either -- treat it as a straight replace
2025-07-01 05:45:58.820 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:45:58.825 return
2025-07-01 05:45:58.831 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:45:58.836 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:45:58.842 else:
2025-07-01 05:45:58.851 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:45:58.863 eqi = None
2025-07-01 05:45:58.875
2025-07-01 05:45:58.884 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:45:58.892 # identical
2025-07-01 05:45:58.899
2025-07-01 05:45:58.906 # pump out diffs from before the synch point
2025-07-01 05:45:58.912 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:45:58.919
2025-07-01 05:45:58.929 # do intraline marking on the synch pair
2025-07-01 05:45:58.938 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:45:58.946 if eqi is None:
2025-07-01 05:45:58.958 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:45:58.967 atags = btags = ""
2025-07-01 05:45:58.973 cruncher.set_seqs(aelt, belt)
2025-07-01 05:45:58.979 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:45:58.986 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:45:58.997 if tag == 'replace':
2025-07-01 05:45:59.007 atags += '^' * la
2025-07-01 05:45:59.016 btags += '^' * lb
2025-07-01 05:45:59.028 elif tag == 'delete':
2025-07-01 05:45:59.035 atags += '-' * la
2025-07-01 05:45:59.042 elif tag == 'insert':
2025-07-01 05:45:59.054 btags += '+' * lb
2025-07-01 05:45:59.062 elif tag == 'equal':
2025-07-01 05:45:59.068 atags += ' ' * la
2025-07-01 05:45:59.074 btags += ' ' * lb
2025-07-01 05:45:59.079 else:
2025-07-01 05:45:59.086 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:45:59.097 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:45:59.108 else:
2025-07-01 05:45:59.118 # the synch pair is identical
2025-07-01 05:45:59.126 yield ' ' + aelt
2025-07-01 05:45:59.137
2025-07-01 05:45:59.146 # pump out diffs from after the synch point
2025-07-01 05:45:59.156 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:45:59.167
2025-07-01 05:45:59.175 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:45:59.182 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:45:59.187
2025-07-01 05:45:59.193 self = <difflib.Differ object at [hex]>
2025-07-01 05:45:59.201 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:45:59.209 alo = 129, ahi = 1101
2025-07-01 05:45:59.216 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:45:59.222 blo = 129, bhi = 1101
2025-07-01 05:45:59.227
2025-07-01 05:45:59.233 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:45:59.239 g = []
2025-07-01 05:45:59.245 if alo < ahi:
2025-07-01 05:45:59.251 if blo < bhi:
2025-07-01 05:45:59.257 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:45:59.264 else:
2025-07-01 05:45:59.271 g = self._dump('-', a, alo, ahi)
2025-07-01 05:45:59.277 elif blo < bhi:
2025-07-01 05:45:59.284 g = self._dump('+', b, blo, bhi)
2025-07-01 05:45:59.290
2025-07-01 05:45:59.296 > yield from g
2025-07-01 05:45:59.302
2025-07-01 05:45:59.308 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:45:59.314 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:45:59.320
2025-07-01 05:45:59.325 self = <difflib.Differ object at [hex]>
2025-07-01 05:45:59.331 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:45:59.337 alo = 129, ahi = 1101
2025-07-01 05:45:59.342 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:45:59.347 blo = 129, bhi = 1101
2025-07-01 05:45:59.355
2025-07-01 05:45:59.360 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:45:59.367 r"""
2025-07-01 05:45:59.375 When replacing one block of lines with another, search the blocks
2025-07-01 05:45:59.382 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:45:59.392 synch point, and intraline difference marking is done on the
2025-07-01 05:45:59.400 similar pair. Lots of work, but often worth it.
2025-07-01 05:45:59.407
2025-07-01 05:45:59.413 Example:
2025-07-01 05:45:59.419
2025-07-01 05:45:59.425 >>> d = Differ()
2025-07-01 05:45:59.431 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:45:59.438 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:45:59.446 >>> print(''.join(results), end="")
2025-07-01 05:45:59.454 - abcDefghiJkl
2025-07-01 05:45:59.467 + abcdefGhijkl
2025-07-01 05:45:59.487 """
2025-07-01 05:45:59.493
2025-07-01 05:45:59.499 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:45:59.507 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:45:59.515 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:45:59.521 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:45:59.528 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:45:59.533
2025-07-01 05:45:59.539 # search for the pair that matches best without being identical
2025-07-01 05:45:59.545 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:45:59.551 # on junk -- unless we have to)
2025-07-01 05:45:59.557 for j in range(blo, bhi):
2025-07-01 05:45:59.562 bj = b[j]
2025-07-01 05:45:59.568 cruncher.set_seq2(bj)
2025-07-01 05:45:59.574 for i in range(alo, ahi):
2025-07-01 05:45:59.580 ai = a[i]
2025-07-01 05:45:59.585 if ai == bj:
2025-07-01 05:45:59.597 if eqi is None:
2025-07-01 05:45:59.607 eqi, eqj = i, j
2025-07-01 05:45:59.615 continue
2025-07-01 05:45:59.622 cruncher.set_seq1(ai)
2025-07-01 05:45:59.629 # computing similarity is expensive, so use the quick
2025-07-01 05:45:59.642 # upper bounds first -- have seen this speed up messy
2025-07-01 05:45:59.652 # compares by a factor of 3.
2025-07-01 05:45:59.659 # note that ratio() is only expensive to compute the first
2025-07-01 05:45:59.668 # time it's called on a sequence pair; the expensive part
2025-07-01 05:45:59.678 # of the computation is cached by cruncher
2025-07-01 05:45:59.689 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:45:59.701 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:45:59.712 cruncher.ratio() > best_ratio:
2025-07-01 05:45:59.724 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:45:59.734 if best_ratio < cutoff:
2025-07-01 05:45:59.743 # no non-identical "pretty close" pair
2025-07-01 05:45:59.750 if eqi is None:
2025-07-01 05:45:59.756 # no identical pair either -- treat it as a straight replace
2025-07-01 05:45:59.761 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:45:59.766 return
2025-07-01 05:45:59.771 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:45:59.775 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:45:59.780 else:
2025-07-01 05:45:59.785 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:45:59.790 eqi = None
2025-07-01 05:45:59.796
2025-07-01 05:45:59.802 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:45:59.808 # identical
2025-07-01 05:45:59.820
2025-07-01 05:45:59.831 # pump out diffs from before the synch point
2025-07-01 05:45:59.841 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:45:59.850
2025-07-01 05:45:59.862 # do intraline marking on the synch pair
2025-07-01 05:45:59.873 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:45:59.884 if eqi is None:
2025-07-01 05:45:59.896 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:45:59.905 atags = btags = ""
2025-07-01 05:45:59.916 cruncher.set_seqs(aelt, belt)
2025-07-01 05:45:59.926 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:45:59.937 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:45:59.946 if tag == 'replace':
2025-07-01 05:45:59.954 atags += '^' * la
2025-07-01 05:45:59.960 btags += '^' * lb
2025-07-01 05:45:59.966 elif tag == 'delete':
2025-07-01 05:45:59.973 atags += '-' * la
2025-07-01 05:45:59.980 elif tag == 'insert':
2025-07-01 05:45:59.986 btags += '+' * lb
2025-07-01 05:45:59.999 elif tag == 'equal':
2025-07-01 05:46:00.009 atags += ' ' * la
2025-07-01 05:46:00.016 btags += ' ' * lb
2025-07-01 05:46:00.021 else:
2025-07-01 05:46:00.026 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:46:00.031 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:46:00.035 else:
2025-07-01 05:46:00.045 # the synch pair is identical
2025-07-01 05:46:00.052 yield ' ' + aelt
2025-07-01 05:46:00.058
2025-07-01 05:46:00.065 # pump out diffs from after the synch point
2025-07-01 05:46:00.071 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:46:00.076
2025-07-01 05:46:00.082 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:46:00.089 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:46:00.094
2025-07-01 05:46:00.099 self = <difflib.Differ object at [hex]>
2025-07-01 05:46:00.104 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:46:00.108 alo = 130, ahi = 1101
2025-07-01 05:46:00.114 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:46:00.118 blo = 130, bhi = 1101
2025-07-01 05:46:00.123
2025-07-01 05:46:00.127 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:46:00.131 g = []
2025-07-01 05:46:00.136 if alo < ahi:
2025-07-01 05:46:00.140 if blo < bhi:
2025-07-01 05:46:00.145 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:46:00.149 else:
2025-07-01 05:46:00.153 g = self._dump('-', a, alo, ahi)
2025-07-01 05:46:00.157 elif blo < bhi:
2025-07-01 05:46:00.162 g = self._dump('+', b, blo, bhi)
2025-07-01 05:46:00.166
2025-07-01 05:46:00.170 > yield from g
2025-07-01 05:46:00.174
2025-07-01 05:46:00.179 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:46:00.183 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:46:00.189
2025-07-01 05:46:00.195 self = <difflib.Differ object at [hex]>
2025-07-01 05:46:00.202 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:46:00.208 alo = 130, ahi = 1101
2025-07-01 05:46:00.220 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:46:00.231 blo = 130, bhi = 1101
2025-07-01 05:46:00.241
2025-07-01 05:46:00.250 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:46:00.259 r"""
2025-07-01 05:46:00.270 When replacing one block of lines with another, search the blocks
2025-07-01 05:46:00.279 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:46:00.287 synch point, and intraline difference marking is done on the
2025-07-01 05:46:00.294 similar pair. Lots of work, but often worth it.
2025-07-01 05:46:00.300
2025-07-01 05:46:00.307 Example:
2025-07-01 05:46:00.317
2025-07-01 05:46:00.325 >>> d = Differ()
2025-07-01 05:46:00.336 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:46:00.347 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:46:00.355 >>> print(''.join(results), end="")
2025-07-01 05:46:00.363 - abcDefghiJkl
2025-07-01 05:46:00.387 + abcdefGhijkl
2025-07-01 05:46:00.410 """
2025-07-01 05:46:00.420
2025-07-01 05:46:00.428 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:46:00.441 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:46:00.449 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:46:00.458 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:46:00.466 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:46:00.473
2025-07-01 05:46:00.480 # search for the pair that matches best without being identical
2025-07-01 05:46:00.487 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:46:00.497 # on junk -- unless we have to)
2025-07-01 05:46:00.508 for j in range(blo, bhi):
2025-07-01 05:46:00.516 bj = b[j]
2025-07-01 05:46:00.524 cruncher.set_seq2(bj)
2025-07-01 05:46:00.531 for i in range(alo, ahi):
2025-07-01 05:46:00.543 ai = a[i]
2025-07-01 05:46:00.552 if ai == bj:
2025-07-01 05:46:00.559 if eqi is None:
2025-07-01 05:46:00.566 eqi, eqj = i, j
2025-07-01 05:46:00.571 continue
2025-07-01 05:46:00.576 cruncher.set_seq1(ai)
2025-07-01 05:46:00.583 # computing similarity is expensive, so use the quick
2025-07-01 05:46:00.590 # upper bounds first -- have seen this speed up messy
2025-07-01 05:46:00.597 # compares by a factor of 3.
2025-07-01 05:46:00.603 # note that ratio() is only expensive to compute the first
2025-07-01 05:46:00.608 # time it's called on a sequence pair; the expensive part
2025-07-01 05:46:00.613 # of the computation is cached by cruncher
2025-07-01 05:46:00.622 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:46:00.633 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:46:00.642 cruncher.ratio() > best_ratio:
2025-07-01 05:46:00.650 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:46:00.657 if best_ratio < cutoff:
2025-07-01 05:46:00.663 # no non-identical "pretty close" pair
2025-07-01 05:46:00.668 if eqi is None:
2025-07-01 05:46:00.673 # no identical pair either -- treat it as a straight replace
2025-07-01 05:46:00.682 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:46:00.694 return
2025-07-01 05:46:00.706 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:46:00.716 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:46:00.723 else:
2025-07-01 05:46:00.734 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:46:00.745 eqi = None
2025-07-01 05:46:00.756
2025-07-01 05:46:00.769 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:46:00.782 # identical
2025-07-01 05:46:00.792
2025-07-01 05:46:00.803 # pump out diffs from before the synch point
2025-07-01 05:46:00.813 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:46:00.825
2025-07-01 05:46:00.835 # do intraline marking on the synch pair
2025-07-01 05:46:00.843 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:46:00.851 if eqi is None:
2025-07-01 05:46:00.858 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:46:00.870 atags = btags = ""
2025-07-01 05:46:00.878 cruncher.set_seqs(aelt, belt)
2025-07-01 05:46:00.885 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:46:00.892 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:46:00.898 if tag == 'replace':
2025-07-01 05:46:00.903 atags += '^' * la
2025-07-01 05:46:00.909 btags += '^' * lb
2025-07-01 05:46:00.914 elif tag == 'delete':
2025-07-01 05:46:00.920 atags += '-' * la
2025-07-01 05:46:00.926 elif tag == 'insert':
2025-07-01 05:46:00.938 btags += '+' * lb
2025-07-01 05:46:00.948 elif tag == 'equal':
2025-07-01 05:46:00.956 atags += ' ' * la
2025-07-01 05:46:00.963 btags += ' ' * lb
2025-07-01 05:46:00.969 else:
2025-07-01 05:46:00.980 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:46:00.989 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:46:01.001 else:
2025-07-01 05:46:01.014 # the synch pair is identical
2025-07-01 05:46:01.025 yield ' ' + aelt
2025-07-01 05:46:01.038
2025-07-01 05:46:01.051 # pump out diffs from after the synch point
2025-07-01 05:46:01.063 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:46:01.072
2025-07-01 05:46:01.080 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:46:01.087 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:46:01.095
2025-07-01 05:46:01.106 self = <difflib.Differ object at [hex]>
2025-07-01 05:46:01.117 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:46:01.127 alo = 131, ahi = 1101
2025-07-01 05:46:01.139 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:46:01.151 blo = 131, bhi = 1101
2025-07-01 05:46:01.159
2025-07-01 05:46:01.167 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:46:01.176 g = []
2025-07-01 05:46:01.186 if alo < ahi:
2025-07-01 05:46:01.196 if blo < bhi:
2025-07-01 05:46:01.204 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:46:01.210 else:
2025-07-01 05:46:01.218 g = self._dump('-', a, alo, ahi)
2025-07-01 05:46:01.226 elif blo < bhi:
2025-07-01 05:46:01.233 g = self._dump('+', b, blo, bhi)
2025-07-01 05:46:01.246
2025-07-01 05:46:01.256 > yield from g
2025-07-01 05:46:01.267
2025-07-01 05:46:01.274 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:46:01.282 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:46:01.293
2025-07-01 05:46:01.303 self = <difflib.Differ object at [hex]>
2025-07-01 05:46:01.316 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:46:01.325 alo = 131, ahi = 1101
2025-07-01 05:46:01.334 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:46:01.341 blo = 131, bhi = 1101
2025-07-01 05:46:01.348
2025-07-01 05:46:01.356 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:46:01.366 r"""
2025-07-01 05:46:01.375 When replacing one block of lines with another, search the blocks
2025-07-01 05:46:01.381 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:46:01.388 synch point, and intraline difference marking is done on the
2025-07-01 05:46:01.396 similar pair. Lots of work, but often worth it.
2025-07-01 05:46:01.402
2025-07-01 05:46:01.408 Example:
2025-07-01 05:46:01.414
2025-07-01 05:46:01.421 >>> d = Differ()
2025-07-01 05:46:01.428 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:46:01.436 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:46:01.443 >>> print(''.join(results), end="")
2025-07-01 05:46:01.448 - abcDefghiJkl
2025-07-01 05:46:01.457 + abcdefGhijkl
2025-07-01 05:46:01.479 """
2025-07-01 05:46:01.488
2025-07-01 05:46:01.494 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:46:01.500 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:46:01.506 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:46:01.517 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:46:01.526 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:46:01.535
2025-07-01 05:46:01.543 # search for the pair that matches best without being identical
2025-07-01 05:46:01.551 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:46:01.558 # on junk -- unless we have to)
2025-07-01 05:46:01.569 for j in range(blo, bhi):
2025-07-01 05:46:01.580 bj = b[j]
2025-07-01 05:46:01.588 cruncher.set_seq2(bj)
2025-07-01 05:46:01.596 for i in range(alo, ahi):
2025-07-01 05:46:01.602 ai = a[i]
2025-07-01 05:46:01.610 if ai == bj:
2025-07-01 05:46:01.622 if eqi is None:
2025-07-01 05:46:01.633 eqi, eqj = i, j
2025-07-01 05:46:01.641 continue
2025-07-01 05:46:01.648 cruncher.set_seq1(ai)
2025-07-01 05:46:01.655 # computing similarity is expensive, so use the quick
2025-07-01 05:46:01.662 # upper bounds first -- have seen this speed up messy
2025-07-01 05:46:01.669 # compares by a factor of 3.
2025-07-01 05:46:01.682 # note that ratio() is only expensive to compute the first
2025-07-01 05:46:01.692 # time it's called on a sequence pair; the expensive part
2025-07-01 05:46:01.700 # of the computation is cached by cruncher
2025-07-01 05:46:01.707 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:46:01.716 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:46:01.728 cruncher.ratio() > best_ratio:
2025-07-01 05:46:01.737 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:46:01.745 if best_ratio < cutoff:
2025-07-01 05:46:01.752 # no non-identical "pretty close" pair
2025-07-01 05:46:01.758 if eqi is None:
2025-07-01 05:46:01.764 # no identical pair either -- treat it as a straight replace
2025-07-01 05:46:01.770 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:46:01.776 return
2025-07-01 05:46:01.782 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:46:01.791 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:46:01.803 else:
2025-07-01 05:46:01.815 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:46:01.825 eqi = None
2025-07-01 05:46:01.832
2025-07-01 05:46:01.840 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:46:01.847 # identical
2025-07-01 05:46:01.854
2025-07-01 05:46:01.865 # pump out diffs from before the synch point
2025-07-01 05:46:01.879 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:46:01.889
2025-07-01 05:46:01.900 # do intraline marking on the synch pair
2025-07-01 05:46:01.910 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:46:01.918 if eqi is None:
2025-07-01 05:46:01.931 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:46:01.941 atags = btags = ""
2025-07-01 05:46:01.948 cruncher.set_seqs(aelt, belt)
2025-07-01 05:46:01.958 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:46:01.965 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:46:01.972 if tag == 'replace':
2025-07-01 05:46:01.979 atags += '^' * la
2025-07-01 05:46:01.986 btags += '^' * lb
2025-07-01 05:46:01.993 elif tag == 'delete':
2025-07-01 05:46:02.000 atags += '-' * la
2025-07-01 05:46:02.008 elif tag == 'insert':
2025-07-01 05:46:02.015 btags += '+' * lb
2025-07-01 05:46:02.021 elif tag == 'equal':
2025-07-01 05:46:02.027 atags += ' ' * la
2025-07-01 05:46:02.033 btags += ' ' * lb
2025-07-01 05:46:02.040 else:
2025-07-01 05:46:02.046 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:46:02.052 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:46:02.058 else:
2025-07-01 05:46:02.065 # the synch pair is identical
2025-07-01 05:46:02.071 yield ' ' + aelt
2025-07-01 05:46:02.077
2025-07-01 05:46:02.082 # pump out diffs from after the synch point
2025-07-01 05:46:02.088 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:46:02.095
2025-07-01 05:46:02.103 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:46:02.111 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:46:02.124
2025-07-01 05:46:02.132 self = <difflib.Differ object at [hex]>
2025-07-01 05:46:02.141 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:46:02.147 alo = 132, ahi = 1101
2025-07-01 05:46:02.154 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:46:02.161 blo = 132, bhi = 1101
2025-07-01 05:46:02.168
2025-07-01 05:46:02.173 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:46:02.179 g = []
2025-07-01 05:46:02.183 if alo < ahi:
2025-07-01 05:46:02.188 if blo < bhi:
2025-07-01 05:46:02.194 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:46:02.206 else:
2025-07-01 05:46:02.214 g = self._dump('-', a, alo, ahi)
2025-07-01 05:46:02.223 elif blo < bhi:
2025-07-01 05:46:02.231 g = self._dump('+', b, blo, bhi)
2025-07-01 05:46:02.238
2025-07-01 05:46:02.249 > yield from g
2025-07-01 05:46:02.259
2025-07-01 05:46:02.268 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:46:02.276 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:46:02.281
2025-07-01 05:46:02.286 self = <difflib.Differ object at [hex]>
2025-07-01 05:46:02.292 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:46:02.298 alo = 132, ahi = 1101
2025-07-01 05:46:02.306 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:46:02.317 blo = 132, bhi = 1101
2025-07-01 05:46:02.327
2025-07-01 05:46:02.336 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:46:02.342 r"""
2025-07-01 05:46:02.348 When replacing one block of lines with another, search the blocks
2025-07-01 05:46:02.355 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:46:02.361 synch point, and intraline difference marking is done on the
2025-07-01 05:46:02.368 similar pair. Lots of work, but often worth it.
2025-07-01 05:46:02.376
2025-07-01 05:46:02.389 Example:
2025-07-01 05:46:02.398
2025-07-01 05:46:02.406 >>> d = Differ()
2025-07-01 05:46:02.414 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:46:02.420 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:46:02.427 >>> print(''.join(results), end="")
2025-07-01 05:46:02.434 - abcDefghiJkl
2025-07-01 05:46:02.451 + abcdefGhijkl
2025-07-01 05:46:02.462 """
2025-07-01 05:46:02.467
2025-07-01 05:46:02.473 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:46:02.478 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:46:02.483 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:46:02.487 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:46:02.494 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:46:02.500
2025-07-01 05:46:02.506 # search for the pair that matches best without being identical
2025-07-01 05:46:02.512 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:46:02.519 # on junk -- unless we have to)
2025-07-01 05:46:02.526 for j in range(blo, bhi):
2025-07-01 05:46:02.537 bj = b[j]
2025-07-01 05:46:02.545 cruncher.set_seq2(bj)
2025-07-01 05:46:02.552 for i in range(alo, ahi):
2025-07-01 05:46:02.558 ai = a[i]
2025-07-01 05:46:02.563 if ai == bj:
2025-07-01 05:46:02.568 if eqi is None:
2025-07-01 05:46:02.574 eqi, eqj = i, j
2025-07-01 05:46:02.580 continue
2025-07-01 05:46:02.586 cruncher.set_seq1(ai)
2025-07-01 05:46:02.597 # computing similarity is expensive, so use the quick
2025-07-01 05:46:02.610 # upper bounds first -- have seen this speed up messy
2025-07-01 05:46:02.617 # compares by a factor of 3.
2025-07-01 05:46:02.624 # note that ratio() is only expensive to compute the first
2025-07-01 05:46:02.631 # time it's called on a sequence pair; the expensive part
2025-07-01 05:46:02.637 # of the computation is cached by cruncher
2025-07-01 05:46:02.643 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:46:02.648 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:46:02.653 cruncher.ratio() > best_ratio:
2025-07-01 05:46:02.658 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:46:02.663 if best_ratio < cutoff:
2025-07-01 05:46:02.668 # no non-identical "pretty close" pair
2025-07-01 05:46:02.674 if eqi is None:
2025-07-01 05:46:02.680 # no identical pair either -- treat it as a straight replace
2025-07-01 05:46:02.686 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:46:02.691 return
2025-07-01 05:46:02.704 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:46:02.713 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:46:02.719 else:
2025-07-01 05:46:02.726 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:46:02.738 eqi = None
2025-07-01 05:46:02.748
2025-07-01 05:46:02.757 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:46:02.764 # identical
2025-07-01 05:46:02.769
2025-07-01 05:46:02.777 # pump out diffs from before the synch point
2025-07-01 05:46:02.788 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:46:02.800
2025-07-01 05:46:02.808 # do intraline marking on the synch pair
2025-07-01 05:46:02.815 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:46:02.823 if eqi is None:
2025-07-01 05:46:02.834 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:46:02.842 atags = btags = ""
2025-07-01 05:46:02.848 cruncher.set_seqs(aelt, belt)
2025-07-01 05:46:02.854 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:46:02.859 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:46:02.865 if tag == 'replace':
2025-07-01 05:46:02.871 atags += '^' * la
2025-07-01 05:46:02.876 btags += '^' * lb
2025-07-01 05:46:02.883 elif tag == 'delete':
2025-07-01 05:46:02.890 atags += '-' * la
2025-07-01 05:46:02.896 elif tag == 'insert':
2025-07-01 05:46:02.903 btags += '+' * lb
2025-07-01 05:46:02.910 elif tag == 'equal':
2025-07-01 05:46:02.917 atags += ' ' * la
2025-07-01 05:46:02.923 btags += ' ' * lb
2025-07-01 05:46:02.931 else:
2025-07-01 05:46:02.942 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:46:02.950 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:46:02.957 else:
2025-07-01 05:46:02.963 # the synch pair is identical
2025-07-01 05:46:02.968 yield ' ' + aelt
2025-07-01 05:46:02.974
2025-07-01 05:46:02.980 # pump out diffs from after the synch point
2025-07-01 05:46:02.987 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:46:02.995
2025-07-01 05:46:03.007 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:46:03.017 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:46:03.027
2025-07-01 05:46:03.038 self = <difflib.Differ object at [hex]>
2025-07-01 05:46:03.048 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:46:03.055 alo = 133, ahi = 1101
2025-07-01 05:46:03.065 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:46:03.071 blo = 133, bhi = 1101
2025-07-01 05:46:03.076
2025-07-01 05:46:03.083 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:46:03.088 g = []
2025-07-01 05:46:03.094 if alo < ahi:
2025-07-01 05:46:03.099 if blo < bhi:
2025-07-01 05:46:03.107 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:46:03.118 else:
2025-07-01 05:46:03.126 g = self._dump('-', a, alo, ahi)
2025-07-01 05:46:03.134 elif blo < bhi:
2025-07-01 05:46:03.145 g = self._dump('+', b, blo, bhi)
2025-07-01 05:46:03.153
2025-07-01 05:46:03.160 > yield from g
2025-07-01 05:46:03.166
2025-07-01 05:46:03.172 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:46:03.179 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:46:03.184
2025-07-01 05:46:03.190 self = <difflib.Differ object at [hex]>
2025-07-01 05:46:03.199 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:46:03.205 alo = 133, ahi = 1101
2025-07-01 05:46:03.213 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:46:03.218 blo = 133, bhi = 1101
2025-07-01 05:46:03.228
2025-07-01 05:46:03.238 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:46:03.245 r"""
2025-07-01 05:46:03.254 When replacing one block of lines with another, search the blocks
2025-07-01 05:46:03.267 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:46:03.277 synch point, and intraline difference marking is done on the
2025-07-01 05:46:03.291 similar pair. Lots of work, but often worth it.
2025-07-01 05:46:03.303
2025-07-01 05:46:03.312 Example:
2025-07-01 05:46:03.321
2025-07-01 05:46:03.327 >>> d = Differ()
2025-07-01 05:46:03.334 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:46:03.341 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:46:03.347 >>> print(''.join(results), end="")
2025-07-01 05:46:03.355 - abcDefghiJkl
2025-07-01 05:46:03.369 + abcdefGhijkl
2025-07-01 05:46:03.383 """
2025-07-01 05:46:03.393
2025-07-01 05:46:03.400 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:46:03.407 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:46:03.413 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:46:03.420 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:46:03.426 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:46:03.432
2025-07-01 05:46:03.437 # search for the pair that matches best without being identical
2025-07-01 05:46:03.442 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:46:03.446 # on junk -- unless we have to)
2025-07-01 05:46:03.451 for j in range(blo, bhi):
2025-07-01 05:46:03.460 bj = b[j]
2025-07-01 05:46:03.471 cruncher.set_seq2(bj)
2025-07-01 05:46:03.480 for i in range(alo, ahi):
2025-07-01 05:46:03.488 ai = a[i]
2025-07-01 05:46:03.501 if ai == bj:
2025-07-01 05:46:03.509 if eqi is None:
2025-07-01 05:46:03.516 eqi, eqj = i, j
2025-07-01 05:46:03.523 continue
2025-07-01 05:46:03.529 cruncher.set_seq1(ai)
2025-07-01 05:46:03.541 # computing similarity is expensive, so use the quick
2025-07-01 05:46:03.549 # upper bounds first -- have seen this speed up messy
2025-07-01 05:46:03.559 # compares by a factor of 3.
2025-07-01 05:46:03.570 # note that ratio() is only expensive to compute the first
2025-07-01 05:46:03.580 # time it's called on a sequence pair; the expensive part
2025-07-01 05:46:03.590 # of the computation is cached by cruncher
2025-07-01 05:46:03.599 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:46:03.607 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:46:03.619 cruncher.ratio() > best_ratio:
2025-07-01 05:46:03.628 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:46:03.636 if best_ratio < cutoff:
2025-07-01 05:46:03.644 # no non-identical "pretty close" pair
2025-07-01 05:46:03.656 if eqi is None:
2025-07-01 05:46:03.667 # no identical pair either -- treat it as a straight replace
2025-07-01 05:46:03.675 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:46:03.681 return
2025-07-01 05:46:03.687 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:46:03.692 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:46:03.702 else:
2025-07-01 05:46:03.710 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:46:03.718 eqi = None
2025-07-01 05:46:03.727
2025-07-01 05:46:03.735 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:46:03.744 # identical
2025-07-01 05:46:03.755
2025-07-01 05:46:03.763 # pump out diffs from before the synch point
2025-07-01 05:46:03.771 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:46:03.779
2025-07-01 05:46:03.788 # do intraline marking on the synch pair
2025-07-01 05:46:03.796 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:46:03.804 if eqi is None:
2025-07-01 05:46:03.810 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:46:03.819 atags = btags = ""
2025-07-01 05:46:03.829 cruncher.set_seqs(aelt, belt)
2025-07-01 05:46:03.836 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:46:03.842 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:46:03.850 if tag == 'replace':
2025-07-01 05:46:03.861 atags += '^' * la
2025-07-01 05:46:03.871 btags += '^' * lb
2025-07-01 05:46:03.882 elif tag == 'delete':
2025-07-01 05:46:03.891 atags += '-' * la
2025-07-01 05:46:03.899 elif tag == 'insert':
2025-07-01 05:46:03.906 btags += '+' * lb
2025-07-01 05:46:03.911 elif tag == 'equal':
2025-07-01 05:46:03.921 atags += ' ' * la
2025-07-01 05:46:03.932 btags += ' ' * lb
2025-07-01 05:46:03.943 else:
2025-07-01 05:46:03.955 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:46:03.968 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:46:03.979 else:
2025-07-01 05:46:03.991 # the synch pair is identical
2025-07-01 05:46:04.003 yield ' ' + aelt
2025-07-01 05:46:04.012
2025-07-01 05:46:04.020 # pump out diffs from after the synch point
2025-07-01 05:46:04.026 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:46:04.032
2025-07-01 05:46:04.038 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:46:04.048 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:46:04.061
2025-07-01 05:46:04.070 self = <difflib.Differ object at [hex]>
2025-07-01 05:46:04.083 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:46:04.092 alo = 136, ahi = 1101
2025-07-01 05:46:04.104 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:46:04.114 blo = 136, bhi = 1101
2025-07-01 05:46:04.124
2025-07-01 05:46:04.133 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:46:04.140 g = []
2025-07-01 05:46:04.153 if alo < ahi:
2025-07-01 05:46:04.164 if blo < bhi:
2025-07-01 05:46:04.177 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:46:04.189 else:
2025-07-01 05:46:04.200 g = self._dump('-', a, alo, ahi)
2025-07-01 05:46:04.207 elif blo < bhi:
2025-07-01 05:46:04.213 g = self._dump('+', b, blo, bhi)
2025-07-01 05:46:04.220
2025-07-01 05:46:04.227 > yield from g
2025-07-01 05:46:04.234
2025-07-01 05:46:04.246 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:46:04.255 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:46:04.263
2025-07-01 05:46:04.269 self = <difflib.Differ object at [hex]>
2025-07-01 05:46:04.278 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:46:04.284 alo = 136, ahi = 1101
2025-07-01 05:46:04.295 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:46:04.304 blo = 136, bhi = 1101
2025-07-01 05:46:04.311
2025-07-01 05:46:04.318 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:46:04.323 r"""
2025-07-01 05:46:04.329 When replacing one block of lines with another, search the blocks
2025-07-01 05:46:04.334 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:46:04.344 synch point, and intraline difference marking is done on the
2025-07-01 05:46:04.349 similar pair. Lots of work, but often worth it.
2025-07-01 05:46:04.355
2025-07-01 05:46:04.361 Example:
2025-07-01 05:46:04.367
2025-07-01 05:46:04.375 >>> d = Differ()
2025-07-01 05:46:04.384 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:46:04.394 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:46:04.404 >>> print(''.join(results), end="")
2025-07-01 05:46:04.412 - abcDefghiJkl
2025-07-01 05:46:04.423 + abcdefGhijkl
2025-07-01 05:46:04.435 """
2025-07-01 05:46:04.442
2025-07-01 05:46:04.454 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:46:04.464 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:46:04.471 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:46:04.477 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:46:04.483 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:46:04.489
2025-07-01 05:46:04.495 # search for the pair that matches best without being identical
2025-07-01 05:46:04.501 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:46:04.507 # on junk -- unless we have to)
2025-07-01 05:46:04.513 for j in range(blo, bhi):
2025-07-01 05:46:04.519 bj = b[j]
2025-07-01 05:46:04.527 cruncher.set_seq2(bj)
2025-07-01 05:46:04.536 for i in range(alo, ahi):
2025-07-01 05:46:04.546 ai = a[i]
2025-07-01 05:46:04.557 if ai == bj:
2025-07-01 05:46:04.569 if eqi is None:
2025-07-01 05:46:04.577 eqi, eqj = i, j
2025-07-01 05:46:04.583 continue
2025-07-01 05:46:04.589 cruncher.set_seq1(ai)
2025-07-01 05:46:04.597 # computing similarity is expensive, so use the quick
2025-07-01 05:46:04.603 # upper bounds first -- have seen this speed up messy
2025-07-01 05:46:04.609 # compares by a factor of 3.
2025-07-01 05:46:04.622 # note that ratio() is only expensive to compute the first
2025-07-01 05:46:04.635 # time it's called on a sequence pair; the expensive part
2025-07-01 05:46:04.647 # of the computation is cached by cruncher
2025-07-01 05:46:04.656 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:46:04.663 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:46:04.671 cruncher.ratio() > best_ratio:
2025-07-01 05:46:04.681 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:46:04.691 if best_ratio < cutoff:
2025-07-01 05:46:04.702 # no non-identical "pretty close" pair
2025-07-01 05:46:04.712 if eqi is None:
2025-07-01 05:46:04.721 # no identical pair either -- treat it as a straight replace
2025-07-01 05:46:04.732 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:46:04.743 return
2025-07-01 05:46:04.752 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:46:04.760 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:46:04.766 else:
2025-07-01 05:46:04.773 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:46:04.779 eqi = None
2025-07-01 05:46:04.784
2025-07-01 05:46:04.790 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:46:04.803 # identical
2025-07-01 05:46:04.811
2025-07-01 05:46:04.823 # pump out diffs from before the synch point
2025-07-01 05:46:04.832 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:46:04.839
2025-07-01 05:46:04.847 # do intraline marking on the synch pair
2025-07-01 05:46:04.858 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:46:04.866 if eqi is None:
2025-07-01 05:46:04.874 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:46:04.883 atags = btags = ""
2025-07-01 05:46:04.890 cruncher.set_seqs(aelt, belt)
2025-07-01 05:46:04.897 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:46:04.903 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:46:04.910 if tag == 'replace':
2025-07-01 05:46:04.916 atags += '^' * la
2025-07-01 05:46:04.928 btags += '^' * lb
2025-07-01 05:46:04.937 elif tag == 'delete':
2025-07-01 05:46:04.944 atags += '-' * la
2025-07-01 05:46:04.957 elif tag == 'insert':
2025-07-01 05:46:04.968 btags += '+' * lb
2025-07-01 05:46:04.979 elif tag == 'equal':
2025-07-01 05:46:04.991 atags += ' ' * la
2025-07-01 05:46:05.003 btags += ' ' * lb
2025-07-01 05:46:05.013 else:
2025-07-01 05:46:05.024 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:46:05.032 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:46:05.038 else:
2025-07-01 05:46:05.044 # the synch pair is identical
2025-07-01 05:46:05.050 yield ' ' + aelt
2025-07-01 05:46:05.059
2025-07-01 05:46:05.069 # pump out diffs from after the synch point
2025-07-01 05:46:05.077 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:46:05.083
2025-07-01 05:46:05.088 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:46:05.094 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:46:05.100
2025-07-01 05:46:05.107 self = <difflib.Differ object at [hex]>
2025-07-01 05:46:05.118 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:46:05.126 alo = 137, ahi = 1101
2025-07-01 05:46:05.136 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:46:05.143 blo = 137, bhi = 1101
2025-07-01 05:46:05.151
2025-07-01 05:46:05.158 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:46:05.163 g = []
2025-07-01 05:46:05.171 if alo < ahi:
2025-07-01 05:46:05.182 if blo < bhi:
2025-07-01 05:46:05.189 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:46:05.196 else:
2025-07-01 05:46:05.204 g = self._dump('-', a, alo, ahi)
2025-07-01 05:46:05.215 elif blo < bhi:
2025-07-01 05:46:05.228 g = self._dump('+', b, blo, bhi)
2025-07-01 05:46:05.239
2025-07-01 05:46:05.248 > yield from g
2025-07-01 05:46:05.255
2025-07-01 05:46:05.266 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:46:05.274 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:46:05.280
2025-07-01 05:46:05.285 self = <difflib.Differ object at [hex]>
2025-07-01 05:46:05.291 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:46:05.295 alo = 137, ahi = 1101
2025-07-01 05:46:05.301 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:46:05.305 blo = 137, bhi = 1101
2025-07-01 05:46:05.310
2025-07-01 05:46:05.315 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:46:05.320 r"""
2025-07-01 05:46:05.326 When replacing one block of lines with another, search the blocks
2025-07-01 05:46:05.332 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:46:05.339 synch point, and intraline difference marking is done on the
2025-07-01 05:46:05.345 similar pair. Lots of work, but often worth it.
2025-07-01 05:46:05.351
2025-07-01 05:46:05.357 Example:
2025-07-01 05:46:05.363
2025-07-01 05:46:05.370 >>> d = Differ()
2025-07-01 05:46:05.377 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:46:05.388 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:46:05.400 >>> print(''.join(results), end="")
2025-07-01 05:46:05.411 - abcDefghiJkl
2025-07-01 05:46:05.428 + abcdefGhijkl
2025-07-01 05:46:05.441 """
2025-07-01 05:46:05.448
2025-07-01 05:46:05.455 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:46:05.462 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:46:05.471 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:46:05.483 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:46:05.491 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:46:05.499
2025-07-01 05:46:05.506 # search for the pair that matches best without being identical
2025-07-01 05:46:05.513 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:46:05.520 # on junk -- unless we have to)
2025-07-01 05:46:05.526 for j in range(blo, bhi):
2025-07-01 05:46:05.531 bj = b[j]
2025-07-01 05:46:05.537 cruncher.set_seq2(bj)
2025-07-01 05:46:05.542 for i in range(alo, ahi):
2025-07-01 05:46:05.549 ai = a[i]
2025-07-01 05:46:05.554 if ai == bj:
2025-07-01 05:46:05.559 if eqi is None:
2025-07-01 05:46:05.565 eqi, eqj = i, j
2025-07-01 05:46:05.570 continue
2025-07-01 05:46:05.579 cruncher.set_seq1(ai)
2025-07-01 05:46:05.592 # computing similarity is expensive, so use the quick
2025-07-01 05:46:05.601 # upper bounds first -- have seen this speed up messy
2025-07-01 05:46:05.609 # compares by a factor of 3.
2025-07-01 05:46:05.616 # note that ratio() is only expensive to compute the first
2025-07-01 05:46:05.622 # time it's called on a sequence pair; the expensive part
2025-07-01 05:46:05.629 # of the computation is cached by cruncher
2025-07-01 05:46:05.635 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:46:05.644 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:46:05.657 cruncher.ratio() > best_ratio:
2025-07-01 05:46:05.667 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:46:05.678 if best_ratio < cutoff:
2025-07-01 05:46:05.690 # no non-identical "pretty close" pair
2025-07-01 05:46:05.702 if eqi is None:
2025-07-01 05:46:05.712 # no identical pair either -- treat it as a straight replace
2025-07-01 05:46:05.721 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:46:05.729 return
2025-07-01 05:46:05.736 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:46:05.741 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:46:05.747 else:
2025-07-01 05:46:05.755 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:46:05.766 eqi = None
2025-07-01 05:46:05.775
2025-07-01 05:46:05.782 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:46:05.792 # identical
2025-07-01 05:46:05.803
2025-07-01 05:46:05.812 # pump out diffs from before the synch point
2025-07-01 05:46:05.823 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:46:05.835
2025-07-01 05:46:05.847 # do intraline marking on the synch pair
2025-07-01 05:46:05.857 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:46:05.868 if eqi is None:
2025-07-01 05:46:05.879 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:46:05.891 atags = btags = ""
2025-07-01 05:46:05.903 cruncher.set_seqs(aelt, belt)
2025-07-01 05:46:05.912 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:46:05.919 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:46:05.926 if tag == 'replace':
2025-07-01 05:46:05.932 atags += '^' * la
2025-07-01 05:46:05.938 btags += '^' * lb
2025-07-01 05:46:05.944 elif tag == 'delete':
2025-07-01 05:46:05.950 atags += '-' * la
2025-07-01 05:46:05.956 elif tag == 'insert':
2025-07-01 05:46:05.963 btags += '+' * lb
2025-07-01 05:46:05.971 elif tag == 'equal':
2025-07-01 05:46:05.979 atags += ' ' * la
2025-07-01 05:46:05.986 btags += ' ' * lb
2025-07-01 05:46:05.992 else:
2025-07-01 05:46:05.998 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:46:06.004 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:46:06.010 else:
2025-07-01 05:46:06.017 # the synch pair is identical
2025-07-01 05:46:06.023 yield ' ' + aelt
2025-07-01 05:46:06.029
2025-07-01 05:46:06.034 # pump out diffs from after the synch point
2025-07-01 05:46:06.040 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:46:06.045
2025-07-01 05:46:06.050 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:46:06.055 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:46:06.063
2025-07-01 05:46:06.075 self = <difflib.Differ object at [hex]>
2025-07-01 05:46:06.083 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:46:06.091 alo = 138, ahi = 1101
2025-07-01 05:46:06.098 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:46:06.104 blo = 138, bhi = 1101
2025-07-01 05:46:06.111
2025-07-01 05:46:06.119 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:46:06.126 g = []
2025-07-01 05:46:06.132 if alo < ahi:
2025-07-01 05:46:06.138 if blo < bhi:
2025-07-01 05:46:06.145 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:46:06.150 else:
2025-07-01 05:46:06.157 g = self._dump('-', a, alo, ahi)
2025-07-01 05:46:06.165 elif blo < bhi:
2025-07-01 05:46:06.171 g = self._dump('+', b, blo, bhi)
2025-07-01 05:46:06.179
2025-07-01 05:46:06.188 > yield from g
2025-07-01 05:46:06.195
2025-07-01 05:46:06.201 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:46:06.207 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:46:06.213
2025-07-01 05:46:06.218 self = <difflib.Differ object at [hex]>
2025-07-01 05:46:06.225 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:46:06.231 alo = 138, ahi = 1101
2025-07-01 05:46:06.238 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:46:06.243 blo = 138, bhi = 1101
2025-07-01 05:46:06.249
2025-07-01 05:46:06.254 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:46:06.260 r"""
2025-07-01 05:46:06.266 When replacing one block of lines with another, search the blocks
2025-07-01 05:46:06.273 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:46:06.279 synch point, and intraline difference marking is done on the
2025-07-01 05:46:06.286 similar pair. Lots of work, but often worth it.
2025-07-01 05:46:06.292
2025-07-01 05:46:06.299 Example:
2025-07-01 05:46:06.306
2025-07-01 05:46:06.315 >>> d = Differ()
2025-07-01 05:46:06.326 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:46:06.334 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:46:06.340 >>> print(''.join(results), end="")
2025-07-01 05:46:06.346 - abcDefghiJkl
2025-07-01 05:46:06.358 + abcdefGhijkl
2025-07-01 05:46:06.371 """
2025-07-01 05:46:06.377
2025-07-01 05:46:06.382 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:46:06.388 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:46:06.394 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:46:06.399 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:46:06.405 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:46:06.411
2025-07-01 05:46:06.418 # search for the pair that matches best without being identical
2025-07-01 05:46:06.424 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:46:06.430 # on junk -- unless we have to)
2025-07-01 05:46:06.435 for j in range(blo, bhi):
2025-07-01 05:46:06.441 bj = b[j]
2025-07-01 05:46:06.446 cruncher.set_seq2(bj)
2025-07-01 05:46:06.452 for i in range(alo, ahi):
2025-07-01 05:46:06.457 ai = a[i]
2025-07-01 05:46:06.463 if ai == bj:
2025-07-01 05:46:06.468 if eqi is None:
2025-07-01 05:46:06.474 eqi, eqj = i, j
2025-07-01 05:46:06.480 continue
2025-07-01 05:46:06.486 cruncher.set_seq1(ai)
2025-07-01 05:46:06.490 # computing similarity is expensive, so use the quick
2025-07-01 05:46:06.498 # upper bounds first -- have seen this speed up messy
2025-07-01 05:46:06.510 # compares by a factor of 3.
2025-07-01 05:46:06.521 # note that ratio() is only expensive to compute the first
2025-07-01 05:46:06.529 # time it's called on a sequence pair; the expensive part
2025-07-01 05:46:06.536 # of the computation is cached by cruncher
2025-07-01 05:46:06.542 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:46:06.550 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:46:06.560 cruncher.ratio() > best_ratio:
2025-07-01 05:46:06.568 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:46:06.574 if best_ratio < cutoff:
2025-07-01 05:46:06.582 # no non-identical "pretty close" pair
2025-07-01 05:46:06.594 if eqi is None:
2025-07-01 05:46:06.607 # no identical pair either -- treat it as a straight replace
2025-07-01 05:46:06.617 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:46:06.625 return
2025-07-01 05:46:06.631 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:46:06.642 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:46:06.648 else:
2025-07-01 05:46:06.656 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:46:06.661 eqi = None
2025-07-01 05:46:06.667
2025-07-01 05:46:06.673 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:46:06.686 # identical
2025-07-01 05:46:06.694
2025-07-01 05:46:06.701 # pump out diffs from before the synch point
2025-07-01 05:46:06.709 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:46:06.715
2025-07-01 05:46:06.721 # do intraline marking on the synch pair
2025-07-01 05:46:06.725 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:46:06.739 if eqi is None:
2025-07-01 05:46:06.748 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:46:06.755 atags = btags = ""
2025-07-01 05:46:06.761 cruncher.set_seqs(aelt, belt)
2025-07-01 05:46:06.773 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:46:06.784 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:46:06.795 if tag == 'replace':
2025-07-01 05:46:06.804 atags += '^' * la
2025-07-01 05:46:06.812 btags += '^' * lb
2025-07-01 05:46:06.819 elif tag == 'delete':
2025-07-01 05:46:06.826 atags += '-' * la
2025-07-01 05:46:06.836 elif tag == 'insert':
2025-07-01 05:46:06.845 btags += '+' * lb
2025-07-01 05:46:06.854 elif tag == 'equal':
2025-07-01 05:46:06.865 atags += ' ' * la
2025-07-01 05:46:06.876 btags += ' ' * lb
2025-07-01 05:46:06.885 else:
2025-07-01 05:46:06.893 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:46:06.900 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:46:06.909 else:
2025-07-01 05:46:06.922 # the synch pair is identical
2025-07-01 05:46:06.930 yield ' ' + aelt
2025-07-01 05:46:06.936
2025-07-01 05:46:06.941 # pump out diffs from after the synch point
2025-07-01 05:46:06.946 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:46:06.950
2025-07-01 05:46:06.960 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:46:06.968 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:46:06.974
2025-07-01 05:46:06.981 self = <difflib.Differ object at [hex]>
2025-07-01 05:46:06.987 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:46:06.992 alo = 139, ahi = 1101
2025-07-01 05:46:07.002 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:46:07.009 blo = 139, bhi = 1101
2025-07-01 05:46:07.015
2025-07-01 05:46:07.021 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:46:07.026 g = []
2025-07-01 05:46:07.037 if alo < ahi:
2025-07-01 05:46:07.047 if blo < bhi:
2025-07-01 05:46:07.055 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:46:07.060 else:
2025-07-01 05:46:07.065 g = self._dump('-', a, alo, ahi)
2025-07-01 05:46:07.069 elif blo < bhi:
2025-07-01 05:46:07.074 g = self._dump('+', b, blo, bhi)
2025-07-01 05:46:07.078
2025-07-01 05:46:07.082 > yield from g
2025-07-01 05:46:07.087
2025-07-01 05:46:07.094 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:46:07.101 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:46:07.107
2025-07-01 05:46:07.112 self = <difflib.Differ object at [hex]>
2025-07-01 05:46:07.117 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:46:07.121 alo = 139, ahi = 1101
2025-07-01 05:46:07.126 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:46:07.130 blo = 139, bhi = 1101
2025-07-01 05:46:07.140
2025-07-01 05:46:07.150 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:46:07.158 r"""
2025-07-01 05:46:07.165 When replacing one block of lines with another, search the blocks
2025-07-01 05:46:07.177 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:46:07.186 synch point, and intraline difference marking is done on the
2025-07-01 05:46:07.195 similar pair. Lots of work, but often worth it.
2025-07-01 05:46:07.202
2025-07-01 05:46:07.211 Example:
2025-07-01 05:46:07.220
2025-07-01 05:46:07.230 >>> d = Differ()
2025-07-01 05:46:07.241 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:46:07.252 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:46:07.263 >>> print(''.join(results), end="")
2025-07-01 05:46:07.271 - abcDefghiJkl
2025-07-01 05:46:07.291 + abcdefGhijkl
2025-07-01 05:46:07.314 """
2025-07-01 05:46:07.322
2025-07-01 05:46:07.332 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:46:07.342 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:46:07.353 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:46:07.363 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:46:07.371 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:46:07.379
2025-07-01 05:46:07.387 # search for the pair that matches best without being identical
2025-07-01 05:46:07.393 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:46:07.399 # on junk -- unless we have to)
2025-07-01 05:46:07.405 for j in range(blo, bhi):
2025-07-01 05:46:07.411 bj = b[j]
2025-07-01 05:46:07.422 cruncher.set_seq2(bj)
2025-07-01 05:46:07.431 for i in range(alo, ahi):
2025-07-01 05:46:07.438 ai = a[i]
2025-07-01 05:46:07.449 if ai == bj:
2025-07-01 05:46:07.458 if eqi is None:
2025-07-01 05:46:07.465 eqi, eqj = i, j
2025-07-01 05:46:07.471 continue
2025-07-01 05:46:07.477 cruncher.set_seq1(ai)
2025-07-01 05:46:07.483 # computing similarity is expensive, so use the quick
2025-07-01 05:46:07.490 # upper bounds first -- have seen this speed up messy
2025-07-01 05:46:07.496 # compares by a factor of 3.
2025-07-01 05:46:07.501 # note that ratio() is only expensive to compute the first
2025-07-01 05:46:07.511 # time it's called on a sequence pair; the expensive part
2025-07-01 05:46:07.523 # of the computation is cached by cruncher
2025-07-01 05:46:07.534 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:46:07.547 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:46:07.558 cruncher.ratio() > best_ratio:
2025-07-01 05:46:07.567 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:46:07.574 if best_ratio < cutoff:
2025-07-01 05:46:07.581 # no non-identical "pretty close" pair
2025-07-01 05:46:07.589 if eqi is None:
2025-07-01 05:46:07.598 # no identical pair either -- treat it as a straight replace
2025-07-01 05:46:07.610 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:46:07.617 return
2025-07-01 05:46:07.624 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:46:07.630 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:46:07.640 else:
2025-07-01 05:46:07.648 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:46:07.656 eqi = None
2025-07-01 05:46:07.663
2025-07-01 05:46:07.669 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:46:07.675 # identical
2025-07-01 05:46:07.681
2025-07-01 05:46:07.686 # pump out diffs from before the synch point
2025-07-01 05:46:07.692 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:46:07.697
2025-07-01 05:46:07.703 # do intraline marking on the synch pair
2025-07-01 05:46:07.711 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:46:07.717 if eqi is None:
2025-07-01 05:46:07.723 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:46:07.729 atags = btags = ""
2025-07-01 05:46:07.735 cruncher.set_seqs(aelt, belt)
2025-07-01 05:46:07.743 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:46:07.751 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:46:07.763 if tag == 'replace':
2025-07-01 05:46:07.771 atags += '^' * la
2025-07-01 05:46:07.777 btags += '^' * lb
2025-07-01 05:46:07.783 elif tag == 'delete':
2025-07-01 05:46:07.788 atags += '-' * la
2025-07-01 05:46:07.794 elif tag == 'insert':
2025-07-01 05:46:07.801 btags += '+' * lb
2025-07-01 05:46:07.809 elif tag == 'equal':
2025-07-01 05:46:07.816 atags += ' ' * la
2025-07-01 05:46:07.824 btags += ' ' * lb
2025-07-01 05:46:07.832 else:
2025-07-01 05:46:07.839 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:46:07.846 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:46:07.856 else:
2025-07-01 05:46:07.864 # the synch pair is identical
2025-07-01 05:46:07.872 yield ' ' + aelt
2025-07-01 05:46:07.878
2025-07-01 05:46:07.884 # pump out diffs from after the synch point
2025-07-01 05:46:07.891 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:46:07.897
2025-07-01 05:46:07.903 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:46:07.910 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:46:07.917
2025-07-01 05:46:07.924 self = <difflib.Differ object at [hex]>
2025-07-01 05:46:07.932 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:46:07.938 alo = 140, ahi = 1101
2025-07-01 05:46:07.946 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:46:07.952 blo = 140, bhi = 1101
2025-07-01 05:46:07.958
2025-07-01 05:46:07.966 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:46:07.973 g = []
2025-07-01 05:46:07.979 if alo < ahi:
2025-07-01 05:46:07.986 if blo < bhi:
2025-07-01 05:46:07.993 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:46:08.000 else:
2025-07-01 05:46:08.006 g = self._dump('-', a, alo, ahi)
2025-07-01 05:46:08.012 elif blo < bhi:
2025-07-01 05:46:08.018 g = self._dump('+', b, blo, bhi)
2025-07-01 05:46:08.024
2025-07-01 05:46:08.030 > yield from g
2025-07-01 05:46:08.037
2025-07-01 05:46:08.043 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:46:08.051 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:46:08.058
2025-07-01 05:46:08.067 self = <difflib.Differ object at [hex]>
2025-07-01 05:46:08.073 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:46:08.079 alo = 140, ahi = 1101
2025-07-01 05:46:08.088 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:46:08.094 blo = 140, bhi = 1101
2025-07-01 05:46:08.100
2025-07-01 05:46:08.106 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:46:08.112 r"""
2025-07-01 05:46:08.118 When replacing one block of lines with another, search the blocks
2025-07-01 05:46:08.124 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:46:08.131 synch point, and intraline difference marking is done on the
2025-07-01 05:46:08.142 similar pair. Lots of work, but often worth it.
2025-07-01 05:46:08.154
2025-07-01 05:46:08.164 Example:
2025-07-01 05:46:08.173
2025-07-01 05:46:08.187 >>> d = Differ()
2025-07-01 05:46:08.199 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:46:08.205 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:46:08.210 >>> print(''.join(results), end="")
2025-07-01 05:46:08.216 - abcDefghiJkl
2025-07-01 05:46:08.226 + abcdefGhijkl
2025-07-01 05:46:08.237 """
2025-07-01 05:46:08.242
2025-07-01 05:46:08.248 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:46:08.254 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:46:08.267 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:46:08.275 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:46:08.281 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:46:08.289
2025-07-01 05:46:08.298 # search for the pair that matches best without being identical
2025-07-01 05:46:08.309 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:46:08.318 # on junk -- unless we have to)
2025-07-01 05:46:08.324 for j in range(blo, bhi):
2025-07-01 05:46:08.332 bj = b[j]
2025-07-01 05:46:08.339 cruncher.set_seq2(bj)
2025-07-01 05:46:08.345 for i in range(alo, ahi):
2025-07-01 05:46:08.351 ai = a[i]
2025-07-01 05:46:08.358 if ai == bj:
2025-07-01 05:46:08.366 if eqi is None:
2025-07-01 05:46:08.373 eqi, eqj = i, j
2025-07-01 05:46:08.381 continue
2025-07-01 05:46:08.387 cruncher.set_seq1(ai)
2025-07-01 05:46:08.393 # computing similarity is expensive, so use the quick
2025-07-01 05:46:08.398 # upper bounds first -- have seen this speed up messy
2025-07-01 05:46:08.403 # compares by a factor of 3.
2025-07-01 05:46:08.407 # note that ratio() is only expensive to compute the first
2025-07-01 05:46:08.412 # time it's called on a sequence pair; the expensive part
2025-07-01 05:46:08.417 # of the computation is cached by cruncher
2025-07-01 05:46:08.424 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:46:08.430 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:46:08.437 cruncher.ratio() > best_ratio:
2025-07-01 05:46:08.443 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:46:08.449 if best_ratio < cutoff:
2025-07-01 05:46:08.454 # no non-identical "pretty close" pair
2025-07-01 05:46:08.460 if eqi is None:
2025-07-01 05:46:08.466 # no identical pair either -- treat it as a straight replace
2025-07-01 05:46:08.474 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:46:08.484 return
2025-07-01 05:46:08.498 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:46:08.508 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:46:08.515 else:
2025-07-01 05:46:08.524 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:46:08.531 eqi = None
2025-07-01 05:46:08.538
2025-07-01 05:46:08.549 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:46:08.557 # identical
2025-07-01 05:46:08.569
2025-07-01 05:46:08.579 # pump out diffs from before the synch point
2025-07-01 05:46:08.588 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:46:08.595
2025-07-01 05:46:08.606 # do intraline marking on the synch pair
2025-07-01 05:46:08.618 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:46:08.628 if eqi is None:
2025-07-01 05:46:08.636 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:46:08.643 atags = btags = ""
2025-07-01 05:46:08.655 cruncher.set_seqs(aelt, belt)
2025-07-01 05:46:08.663 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:46:08.669 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:46:08.674 if tag == 'replace':
2025-07-01 05:46:08.679 atags += '^' * la
2025-07-01 05:46:08.684 btags += '^' * lb
2025-07-01 05:46:08.688 elif tag == 'delete':
2025-07-01 05:46:08.693 atags += '-' * la
2025-07-01 05:46:08.699 elif tag == 'insert':
2025-07-01 05:46:08.704 btags += '+' * lb
2025-07-01 05:46:08.710 elif tag == 'equal':
2025-07-01 05:46:08.715 atags += ' ' * la
2025-07-01 05:46:08.721 btags += ' ' * lb
2025-07-01 05:46:08.727 else:
2025-07-01 05:46:08.735 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:46:08.746 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:46:08.756 else:
2025-07-01 05:46:08.770 # the synch pair is identical
2025-07-01 05:46:08.782 yield ' ' + aelt
2025-07-01 05:46:08.790
2025-07-01 05:46:08.798 # pump out diffs from after the synch point
2025-07-01 05:46:08.810 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:46:08.820
2025-07-01 05:46:08.828 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:46:08.836 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:46:08.843
2025-07-01 05:46:08.853 self = <difflib.Differ object at [hex]>
2025-07-01 05:46:08.862 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:46:08.869 alo = 141, ahi = 1101
2025-07-01 05:46:08.875 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:46:08.880 blo = 141, bhi = 1101
2025-07-01 05:46:08.884
2025-07-01 05:46:08.889 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:46:08.893 g = []
2025-07-01 05:46:08.898 if alo < ahi:
2025-07-01 05:46:08.902 if blo < bhi:
2025-07-01 05:46:08.906 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:46:08.910 else:
2025-07-01 05:46:08.915 g = self._dump('-', a, alo, ahi)
2025-07-01 05:46:08.919 elif blo < bhi:
2025-07-01 05:46:08.923 g = self._dump('+', b, blo, bhi)
2025-07-01 05:46:08.927
2025-07-01 05:46:08.932 > yield from g
2025-07-01 05:46:08.936
2025-07-01 05:46:08.941 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:46:08.945 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:46:08.949
2025-07-01 05:46:08.954 self = <difflib.Differ object at [hex]>
2025-07-01 05:46:08.959 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:46:08.963 alo = 141, ahi = 1101
2025-07-01 05:46:08.968 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:46:08.972 blo = 141, bhi = 1101
2025-07-01 05:46:08.976
2025-07-01 05:46:08.981 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:46:08.991 r"""
2025-07-01 05:46:09.003 When replacing one block of lines with another, search the blocks
2025-07-01 05:46:09.010 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:46:09.017 synch point, and intraline difference marking is done on the
2025-07-01 05:46:09.022 similar pair. Lots of work, but often worth it.
2025-07-01 05:46:09.030
2025-07-01 05:46:09.039 Example:
2025-07-01 05:46:09.050
2025-07-01 05:46:09.058 >>> d = Differ()
2025-07-01 05:46:09.064 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:46:09.069 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:46:09.075 >>> print(''.join(results), end="")
2025-07-01 05:46:09.079 - abcDefghiJkl
2025-07-01 05:46:09.088 + abcdefGhijkl
2025-07-01 05:46:09.096 """
2025-07-01 05:46:09.102
2025-07-01 05:46:09.112 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:46:09.122 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:46:09.129 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:46:09.135 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:46:09.141 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:46:09.150
2025-07-01 05:46:09.162 # search for the pair that matches best without being identical
2025-07-01 05:46:09.175 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:46:09.186 # on junk -- unless we have to)
2025-07-01 05:46:09.197 for j in range(blo, bhi):
2025-07-01 05:46:09.206 bj = b[j]
2025-07-01 05:46:09.219 cruncher.set_seq2(bj)
2025-07-01 05:46:09.228 for i in range(alo, ahi):
2025-07-01 05:46:09.235 ai = a[i]
2025-07-01 05:46:09.242 if ai == bj:
2025-07-01 05:46:09.252 if eqi is None:
2025-07-01 05:46:09.264 eqi, eqj = i, j
2025-07-01 05:46:09.276 continue
2025-07-01 05:46:09.284 cruncher.set_seq1(ai)
2025-07-01 05:46:09.293 # computing similarity is expensive, so use the quick
2025-07-01 05:46:09.303 # upper bounds first -- have seen this speed up messy
2025-07-01 05:46:09.314 # compares by a factor of 3.
2025-07-01 05:46:09.323 # note that ratio() is only expensive to compute the first
2025-07-01 05:46:09.331 # time it's called on a sequence pair; the expensive part
2025-07-01 05:46:09.338 # of the computation is cached by cruncher
2025-07-01 05:46:09.345 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:46:09.352 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:46:09.357 cruncher.ratio() > best_ratio:
2025-07-01 05:46:09.369 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:46:09.378 if best_ratio < cutoff:
2025-07-01 05:46:09.386 # no non-identical "pretty close" pair
2025-07-01 05:46:09.394 if eqi is None:
2025-07-01 05:46:09.405 # no identical pair either -- treat it as a straight replace
2025-07-01 05:46:09.416 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:46:09.424 return
2025-07-01 05:46:09.432 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:46:09.441 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:46:09.453 else:
2025-07-01 05:46:09.463 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:46:09.470 eqi = None
2025-07-01 05:46:09.477
2025-07-01 05:46:09.484 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:46:09.489 # identical
2025-07-01 05:46:09.495
2025-07-01 05:46:09.500 # pump out diffs from before the synch point
2025-07-01 05:46:09.506 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:46:09.516
2025-07-01 05:46:09.527 # do intraline marking on the synch pair
2025-07-01 05:46:09.535 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:46:09.547 if eqi is None:
2025-07-01 05:46:09.558 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:46:09.567 atags = btags = ""
2025-07-01 05:46:09.579 cruncher.set_seqs(aelt, belt)
2025-07-01 05:46:09.592 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:46:09.602 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:46:09.609 if tag == 'replace':
2025-07-01 05:46:09.615 atags += '^' * la
2025-07-01 05:46:09.622 btags += '^' * lb
2025-07-01 05:46:09.632 elif tag == 'delete':
2025-07-01 05:46:09.640 atags += '-' * la
2025-07-01 05:46:09.648 elif tag == 'insert':
2025-07-01 05:46:09.655 btags += '+' * lb
2025-07-01 05:46:09.661 elif tag == 'equal':
2025-07-01 05:46:09.666 atags += ' ' * la
2025-07-01 05:46:09.671 btags += ' ' * lb
2025-07-01 05:46:09.678 else:
2025-07-01 05:46:09.684 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:46:09.690 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:46:09.696 else:
2025-07-01 05:46:09.703 # the synch pair is identical
2025-07-01 05:46:09.711 yield ' ' + aelt
2025-07-01 05:46:09.721
2025-07-01 05:46:09.728 # pump out diffs from after the synch point
2025-07-01 05:46:09.735 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:46:09.746
2025-07-01 05:46:09.754 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:46:09.761 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:46:09.767
2025-07-01 05:46:09.772 self = <difflib.Differ object at [hex]>
2025-07-01 05:46:09.778 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:46:09.783 alo = 142, ahi = 1101
2025-07-01 05:46:09.790 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:46:09.796 blo = 142, bhi = 1101
2025-07-01 05:46:09.803
2025-07-01 05:46:09.810 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:46:09.824 g = []
2025-07-01 05:46:09.833 if alo < ahi:
2025-07-01 05:46:09.839 if blo < bhi:
2025-07-01 05:46:09.846 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:46:09.856 else:
2025-07-01 05:46:09.864 g = self._dump('-', a, alo, ahi)
2025-07-01 05:46:09.871 elif blo < bhi:
2025-07-01 05:46:09.877 g = self._dump('+', b, blo, bhi)
2025-07-01 05:46:09.888
2025-07-01 05:46:09.897 > yield from g
2025-07-01 05:46:09.905
2025-07-01 05:46:09.911 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:46:09.918 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:46:09.924
2025-07-01 05:46:09.931 self = <difflib.Differ object at [hex]>
2025-07-01 05:46:09.941 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:46:09.949 alo = 142, ahi = 1101
2025-07-01 05:46:09.955 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:46:09.962 blo = 142, bhi = 1101
2025-07-01 05:46:09.967
2025-07-01 05:46:09.974 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:46:09.985 r"""
2025-07-01 05:46:09.993 When replacing one block of lines with another, search the blocks
2025-07-01 05:46:10.001 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:46:10.008 synch point, and intraline difference marking is done on the
2025-07-01 05:46:10.015 similar pair. Lots of work, but often worth it.
2025-07-01 05:46:10.021
2025-07-01 05:46:10.026 Example:
2025-07-01 05:46:10.037
2025-07-01 05:46:10.046 >>> d = Differ()
2025-07-01 05:46:10.058 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:46:10.070 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:46:10.081 >>> print(''.join(results), end="")
2025-07-01 05:46:10.089 - abcDefghiJkl
2025-07-01 05:46:10.102 + abcdefGhijkl
2025-07-01 05:46:10.116 """
2025-07-01 05:46:10.126
2025-07-01 05:46:10.137 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:46:10.147 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:46:10.159 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:46:10.171 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:46:10.180 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:46:10.188
2025-07-01 05:46:10.195 # search for the pair that matches best without being identical
2025-07-01 05:46:10.202 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:46:10.207 # on junk -- unless we have to)
2025-07-01 05:46:10.213 for j in range(blo, bhi):
2025-07-01 05:46:10.219 bj = b[j]
2025-07-01 05:46:10.225 cruncher.set_seq2(bj)
2025-07-01 05:46:10.231 for i in range(alo, ahi):
2025-07-01 05:46:10.236 ai = a[i]
2025-07-01 05:46:10.241 if ai == bj:
2025-07-01 05:46:10.255 if eqi is None:
2025-07-01 05:46:10.264 eqi, eqj = i, j
2025-07-01 05:46:10.271 continue
2025-07-01 05:46:10.283 cruncher.set_seq1(ai)
2025-07-01 05:46:10.292 # computing similarity is expensive, so use the quick
2025-07-01 05:46:10.299 # upper bounds first -- have seen this speed up messy
2025-07-01 05:46:10.305 # compares by a factor of 3.
2025-07-01 05:46:10.311 # note that ratio() is only expensive to compute the first
2025-07-01 05:46:10.316 # time it's called on a sequence pair; the expensive part
2025-07-01 05:46:10.322 # of the computation is cached by cruncher
2025-07-01 05:46:10.331 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:46:10.345 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:46:10.353 cruncher.ratio() > best_ratio:
2025-07-01 05:46:10.361 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:46:10.367 if best_ratio < cutoff:
2025-07-01 05:46:10.374 # no non-identical "pretty close" pair
2025-07-01 05:46:10.381 if eqi is None:
2025-07-01 05:46:10.388 # no identical pair either -- treat it as a straight replace
2025-07-01 05:46:10.397 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:46:10.404 return
2025-07-01 05:46:10.410 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:46:10.416 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:46:10.421 else:
2025-07-01 05:46:10.426 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:46:10.431 eqi = None
2025-07-01 05:46:10.437
2025-07-01 05:46:10.445 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:46:10.451 # identical
2025-07-01 05:46:10.458
2025-07-01 05:46:10.466 # pump out diffs from before the synch point
2025-07-01 05:46:10.477 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:46:10.485
2025-07-01 05:46:10.491 # do intraline marking on the synch pair
2025-07-01 05:46:10.497 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:46:10.502 if eqi is None:
2025-07-01 05:46:10.508 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:46:10.514 atags = btags = ""
2025-07-01 05:46:10.525 cruncher.set_seqs(aelt, belt)
2025-07-01 05:46:10.535 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:46:10.547 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:46:10.555 if tag == 'replace':
2025-07-01 05:46:10.562 atags += '^' * la
2025-07-01 05:46:10.569 btags += '^' * lb
2025-07-01 05:46:10.576 elif tag == 'delete':
2025-07-01 05:46:10.583 atags += '-' * la
2025-07-01 05:46:10.588 elif tag == 'insert':
2025-07-01 05:46:10.594 btags += '+' * lb
2025-07-01 05:46:10.599 elif tag == 'equal':
2025-07-01 05:46:10.604 atags += ' ' * la
2025-07-01 05:46:10.610 btags += ' ' * lb
2025-07-01 05:46:10.615 else:
2025-07-01 05:46:10.623 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:46:10.632 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:46:10.638 else:
2025-07-01 05:46:10.644 # the synch pair is identical
2025-07-01 05:46:10.653 yield ' ' + aelt
2025-07-01 05:46:10.660
2025-07-01 05:46:10.667 # pump out diffs from after the synch point
2025-07-01 05:46:10.673 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:46:10.679
2025-07-01 05:46:10.687 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:46:10.698 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:46:10.706
2025-07-01 05:46:10.712 self = <difflib.Differ object at [hex]>
2025-07-01 05:46:10.718 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:46:10.724 alo = 143, ahi = 1101
2025-07-01 05:46:10.730 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:46:10.735 blo = 143, bhi = 1101
2025-07-01 05:46:10.744
2025-07-01 05:46:10.755 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:46:10.767 g = []
2025-07-01 05:46:10.777 if alo < ahi:
2025-07-01 05:46:10.789 if blo < bhi:
2025-07-01 05:46:10.798 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:46:10.806 else:
2025-07-01 05:46:10.817 g = self._dump('-', a, alo, ahi)
2025-07-01 05:46:10.826 elif blo < bhi:
2025-07-01 05:46:10.836 g = self._dump('+', b, blo, bhi)
2025-07-01 05:46:10.847
2025-07-01 05:46:10.855 > yield from g
2025-07-01 05:46:10.863
2025-07-01 05:46:10.872 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:46:10.881 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:46:10.887
2025-07-01 05:46:10.901 self = <difflib.Differ object at [hex]>
2025-07-01 05:46:10.913 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:46:10.921 alo = 143, ahi = 1101
2025-07-01 05:46:10.927 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:46:10.933 blo = 143, bhi = 1101
2025-07-01 05:46:10.937
2025-07-01 05:46:10.942 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:46:10.950 r"""
2025-07-01 05:46:10.963 When replacing one block of lines with another, search the blocks
2025-07-01 05:46:10.974 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:46:10.987 synch point, and intraline difference marking is done on the
2025-07-01 05:46:10.998 similar pair. Lots of work, but often worth it.
2025-07-01 05:46:11.007
2025-07-01 05:46:11.014 Example:
2025-07-01 05:46:11.024
2025-07-01 05:46:11.035 >>> d = Differ()
2025-07-01 05:46:11.048 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:46:11.057 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:46:11.067 >>> print(''.join(results), end="")
2025-07-01 05:46:11.077 - abcDefghiJkl
2025-07-01 05:46:11.095 + abcdefGhijkl
2025-07-01 05:46:11.110 """
2025-07-01 05:46:11.120
2025-07-01 05:46:11.128 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:46:11.136 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:46:11.142 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:46:11.148 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:46:11.154 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:46:11.166
2025-07-01 05:46:11.175 # search for the pair that matches best without being identical
2025-07-01 05:46:11.183 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:46:11.192 # on junk -- unless we have to)
2025-07-01 05:46:11.201 for j in range(blo, bhi):
2025-07-01 05:46:11.209 bj = b[j]
2025-07-01 05:46:11.217 cruncher.set_seq2(bj)
2025-07-01 05:46:11.224 for i in range(alo, ahi):
2025-07-01 05:46:11.234 ai = a[i]
2025-07-01 05:46:11.241 if ai == bj:
2025-07-01 05:46:11.247 if eqi is None:
2025-07-01 05:46:11.253 eqi, eqj = i, j
2025-07-01 05:46:11.258 continue
2025-07-01 05:46:11.263 cruncher.set_seq1(ai)
2025-07-01 05:46:11.268 # computing similarity is expensive, so use the quick
2025-07-01 05:46:11.273 # upper bounds first -- have seen this speed up messy
2025-07-01 05:46:11.278 # compares by a factor of 3.
2025-07-01 05:46:11.283 # note that ratio() is only expensive to compute the first
2025-07-01 05:46:11.288 # time it's called on a sequence pair; the expensive part
2025-07-01 05:46:11.294 # of the computation is cached by cruncher
2025-07-01 05:46:11.301 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:46:11.312 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:46:11.321 cruncher.ratio() > best_ratio:
2025-07-01 05:46:11.332 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:46:11.342 if best_ratio < cutoff:
2025-07-01 05:46:11.351 # no non-identical "pretty close" pair
2025-07-01 05:46:11.361 if eqi is None:
2025-07-01 05:46:11.371 # no identical pair either -- treat it as a straight replace
2025-07-01 05:46:11.381 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:46:11.388 return
2025-07-01 05:46:11.396 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:46:11.403 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:46:11.409 else:
2025-07-01 05:46:11.421 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:46:11.432 eqi = None
2025-07-01 05:46:11.443
2025-07-01 05:46:11.453 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:46:11.464 # identical
2025-07-01 05:46:11.473
2025-07-01 05:46:11.481 # pump out diffs from before the synch point
2025-07-01 05:46:11.487 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:46:11.493
2025-07-01 05:46:11.499 # do intraline marking on the synch pair
2025-07-01 05:46:11.506 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:46:11.514 if eqi is None:
2025-07-01 05:46:11.523 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:46:11.530 atags = btags = ""
2025-07-01 05:46:11.540 cruncher.set_seqs(aelt, belt)
2025-07-01 05:46:11.551 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:46:11.561 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:46:11.569 if tag == 'replace':
2025-07-01 05:46:11.576 atags += '^' * la
2025-07-01 05:46:11.582 btags += '^' * lb
2025-07-01 05:46:11.591 elif tag == 'delete':
2025-07-01 05:46:11.603 atags += '-' * la
2025-07-01 05:46:11.614 elif tag == 'insert':
2025-07-01 05:46:11.625 btags += '+' * lb
2025-07-01 05:46:11.638 elif tag == 'equal':
2025-07-01 05:46:11.649 atags += ' ' * la
2025-07-01 05:46:11.660 btags += ' ' * lb
2025-07-01 05:46:11.667 else:
2025-07-01 05:46:11.675 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:46:11.681 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:46:11.689 else:
2025-07-01 05:46:11.696 # the synch pair is identical
2025-07-01 05:46:11.702 yield ' ' + aelt
2025-07-01 05:46:11.706
2025-07-01 05:46:11.712 # pump out diffs from after the synch point
2025-07-01 05:46:11.718 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:46:11.729
2025-07-01 05:46:11.739 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:46:11.747 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:46:11.755
2025-07-01 05:46:11.765 self = <difflib.Differ object at [hex]>
2025-07-01 05:46:11.776 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:46:11.785 alo = 144, ahi = 1101
2025-07-01 05:46:11.795 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:46:11.802 blo = 144, bhi = 1101
2025-07-01 05:46:11.812
2025-07-01 05:46:11.821 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:46:11.829 g = []
2025-07-01 05:46:11.836 if alo < ahi:
2025-07-01 05:46:11.842 if blo < bhi:
2025-07-01 05:46:11.848 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:46:11.856 else:
2025-07-01 05:46:11.868 g = self._dump('-', a, alo, ahi)
2025-07-01 05:46:11.878 elif blo < bhi:
2025-07-01 05:46:11.889 g = self._dump('+', b, blo, bhi)
2025-07-01 05:46:11.899
2025-07-01 05:46:11.906 > yield from g
2025-07-01 05:46:11.913
2025-07-01 05:46:11.918 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:46:11.924 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:46:11.930
2025-07-01 05:46:11.935 self = <difflib.Differ object at [hex]>
2025-07-01 05:46:11.942 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:46:11.949 alo = 144, ahi = 1101
2025-07-01 05:46:11.956 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:46:11.963 blo = 144, bhi = 1101
2025-07-01 05:46:11.970
2025-07-01 05:46:11.979 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:46:11.992 r"""
2025-07-01 05:46:12.003 When replacing one block of lines with another, search the blocks
2025-07-01 05:46:12.013 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:46:12.026 synch point, and intraline difference marking is done on the
2025-07-01 05:46:12.035 similar pair. Lots of work, but often worth it.
2025-07-01 05:46:12.041
2025-07-01 05:46:12.046 Example:
2025-07-01 05:46:12.057
2025-07-01 05:46:12.066 >>> d = Differ()
2025-07-01 05:46:12.077 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:46:12.087 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:46:12.098 >>> print(''.join(results), end="")
2025-07-01 05:46:12.107 - abcDefghiJkl
2025-07-01 05:46:12.128 + abcdefGhijkl
2025-07-01 05:46:12.143 """
2025-07-01 05:46:12.149
2025-07-01 05:46:12.156 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:46:12.162 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:46:12.169 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:46:12.175 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:46:12.181 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:46:12.187
2025-07-01 05:46:12.193 # search for the pair that matches best without being identical
2025-07-01 05:46:12.199 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:46:12.204 # on junk -- unless we have to)
2025-07-01 05:46:12.210 for j in range(blo, bhi):
2025-07-01 05:46:12.215 bj = b[j]
2025-07-01 05:46:12.221 cruncher.set_seq2(bj)
2025-07-01 05:46:12.227 for i in range(alo, ahi):
2025-07-01 05:46:12.232 ai = a[i]
2025-07-01 05:46:12.238 if ai == bj:
2025-07-01 05:46:12.244 if eqi is None:
2025-07-01 05:46:12.249 eqi, eqj = i, j
2025-07-01 05:46:12.255 continue
2025-07-01 05:46:12.262 cruncher.set_seq1(ai)
2025-07-01 05:46:12.268 # computing similarity is expensive, so use the quick
2025-07-01 05:46:12.274 # upper bounds first -- have seen this speed up messy
2025-07-01 05:46:12.280 # compares by a factor of 3.
2025-07-01 05:46:12.286 # note that ratio() is only expensive to compute the first
2025-07-01 05:46:12.295 # time it's called on a sequence pair; the expensive part
2025-07-01 05:46:12.303 # of the computation is cached by cruncher
2025-07-01 05:46:12.311 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:46:12.318 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:46:12.327 cruncher.ratio() > best_ratio:
2025-07-01 05:46:12.339 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:46:12.345 if best_ratio < cutoff:
2025-07-01 05:46:12.350 # no non-identical "pretty close" pair
2025-07-01 05:46:12.356 if eqi is None:
2025-07-01 05:46:12.363 # no identical pair either -- treat it as a straight replace
2025-07-01 05:46:12.369 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:46:12.375 return
2025-07-01 05:46:12.385 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:46:12.399 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:46:12.408 else:
2025-07-01 05:46:12.422 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:46:12.435 eqi = None
2025-07-01 05:46:12.445
2025-07-01 05:46:12.455 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:46:12.462 # identical
2025-07-01 05:46:12.469
2025-07-01 05:46:12.475 # pump out diffs from before the synch point
2025-07-01 05:46:12.481 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:46:12.487
2025-07-01 05:46:12.495 # do intraline marking on the synch pair
2025-07-01 05:46:12.505 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:46:12.516 if eqi is None:
2025-07-01 05:46:12.526 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:46:12.537 atags = btags = ""
2025-07-01 05:46:12.548 cruncher.set_seqs(aelt, belt)
2025-07-01 05:46:12.556 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:46:12.564 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:46:12.571 if tag == 'replace':
2025-07-01 05:46:12.578 atags += '^' * la
2025-07-01 05:46:12.588 btags += '^' * lb
2025-07-01 05:46:12.595 elif tag == 'delete':
2025-07-01 05:46:12.601 atags += '-' * la
2025-07-01 05:46:12.606 elif tag == 'insert':
2025-07-01 05:46:12.614 btags += '+' * lb
2025-07-01 05:46:12.622 elif tag == 'equal':
2025-07-01 05:46:12.630 atags += ' ' * la
2025-07-01 05:46:12.637 btags += ' ' * lb
2025-07-01 05:46:12.642 else:
2025-07-01 05:46:12.649 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:46:12.657 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:46:12.664 else:
2025-07-01 05:46:12.671 # the synch pair is identical
2025-07-01 05:46:12.679 yield ' ' + aelt
2025-07-01 05:46:12.689
2025-07-01 05:46:12.698 # pump out diffs from after the synch point
2025-07-01 05:46:12.704 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:46:12.710
2025-07-01 05:46:12.716 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:46:12.721 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:46:12.727
2025-07-01 05:46:12.733 self = <difflib.Differ object at [hex]>
2025-07-01 05:46:12.743 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:46:12.750 alo = 145, ahi = 1101
2025-07-01 05:46:12.757 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:46:12.762 blo = 145, bhi = 1101
2025-07-01 05:46:12.767
2025-07-01 05:46:12.772 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:46:12.778 g = []
2025-07-01 05:46:12.783 if alo < ahi:
2025-07-01 05:46:12.789 if blo < bhi:
2025-07-01 05:46:12.794 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:46:12.801 else:
2025-07-01 05:46:12.807 g = self._dump('-', a, alo, ahi)
2025-07-01 05:46:12.814 elif blo < bhi:
2025-07-01 05:46:12.822 g = self._dump('+', b, blo, bhi)
2025-07-01 05:46:12.828
2025-07-01 05:46:12.834 > yield from g
2025-07-01 05:46:12.840
2025-07-01 05:46:12.845 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:46:12.856 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:46:12.865
2025-07-01 05:46:12.873 self = <difflib.Differ object at [hex]>
2025-07-01 05:46:12.880 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:46:12.886 alo = 145, ahi = 1101
2025-07-01 05:46:12.895 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:46:12.906 blo = 145, bhi = 1101
2025-07-01 05:46:12.914
2025-07-01 05:46:12.922 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:46:12.930 r"""
2025-07-01 05:46:12.940 When replacing one block of lines with another, search the blocks
2025-07-01 05:46:12.947 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:46:12.954 synch point, and intraline difference marking is done on the
2025-07-01 05:46:12.960 similar pair. Lots of work, but often worth it.
2025-07-01 05:46:12.966
2025-07-01 05:46:12.972 Example:
2025-07-01 05:46:12.977
2025-07-01 05:46:12.982 >>> d = Differ()
2025-07-01 05:46:12.988 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:46:12.994 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:46:13.005 >>> print(''.join(results), end="")
2025-07-01 05:46:13.015 - abcDefghiJkl
2025-07-01 05:46:13.036 + abcdefGhijkl
2025-07-01 05:46:13.050 """
2025-07-01 05:46:13.060
2025-07-01 05:46:13.070 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:46:13.078 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:46:13.085 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:46:13.090 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:46:13.096 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:46:13.101
2025-07-01 05:46:13.107 # search for the pair that matches best without being identical
2025-07-01 05:46:13.114 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:46:13.125 # on junk -- unless we have to)
2025-07-01 05:46:13.137 for j in range(blo, bhi):
2025-07-01 05:46:13.147 bj = b[j]
2025-07-01 05:46:13.159 cruncher.set_seq2(bj)
2025-07-01 05:46:13.168 for i in range(alo, ahi):
2025-07-01 05:46:13.176 ai = a[i]
2025-07-01 05:46:13.183 if ai == bj:
2025-07-01 05:46:13.194 if eqi is None:
2025-07-01 05:46:13.205 eqi, eqj = i, j
2025-07-01 05:46:13.217 continue
2025-07-01 05:46:13.226 cruncher.set_seq1(ai)
2025-07-01 05:46:13.235 # computing similarity is expensive, so use the quick
2025-07-01 05:46:13.247 # upper bounds first -- have seen this speed up messy
2025-07-01 05:46:13.256 # compares by a factor of 3.
2025-07-01 05:46:13.267 # note that ratio() is only expensive to compute the first
2025-07-01 05:46:13.276 # time it's called on a sequence pair; the expensive part
2025-07-01 05:46:13.284 # of the computation is cached by cruncher
2025-07-01 05:46:13.291 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:46:13.297 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:46:13.304 cruncher.ratio() > best_ratio:
2025-07-01 05:46:13.310 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:46:13.319 if best_ratio < cutoff:
2025-07-01 05:46:13.331 # no non-identical "pretty close" pair
2025-07-01 05:46:13.343 if eqi is None:
2025-07-01 05:46:13.354 # no identical pair either -- treat it as a straight replace
2025-07-01 05:46:13.365 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:46:13.375 return
2025-07-01 05:46:13.387 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:46:13.398 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:46:13.407 else:
2025-07-01 05:46:13.414 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:46:13.422 eqi = None
2025-07-01 05:46:13.428
2025-07-01 05:46:13.435 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:46:13.444 # identical
2025-07-01 05:46:13.454
2025-07-01 05:46:13.462 # pump out diffs from before the synch point
2025-07-01 05:46:13.471 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:46:13.483
2025-07-01 05:46:13.496 # do intraline marking on the synch pair
2025-07-01 05:46:13.508 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:46:13.521 if eqi is None:
2025-07-01 05:46:13.531 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:46:13.542 atags = btags = ""
2025-07-01 05:46:13.555 cruncher.set_seqs(aelt, belt)
2025-07-01 05:46:13.566 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:46:13.578 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:46:13.588 if tag == 'replace':
2025-07-01 05:46:13.597 atags += '^' * la
2025-07-01 05:46:13.605 btags += '^' * lb
2025-07-01 05:46:13.615 elif tag == 'delete':
2025-07-01 05:46:13.623 atags += '-' * la
2025-07-01 05:46:13.630 elif tag == 'insert':
2025-07-01 05:46:13.636 btags += '+' * lb
2025-07-01 05:46:13.642 elif tag == 'equal':
2025-07-01 05:46:13.647 atags += ' ' * la
2025-07-01 05:46:13.651 btags += ' ' * lb
2025-07-01 05:46:13.656 else:
2025-07-01 05:46:13.660 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:46:13.665 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:46:13.670 else:
2025-07-01 05:46:13.682 # the synch pair is identical
2025-07-01 05:46:13.692 yield ' ' + aelt
2025-07-01 05:46:13.702
2025-07-01 05:46:13.710 # pump out diffs from after the synch point
2025-07-01 05:46:13.716 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:46:13.721
2025-07-01 05:46:13.726 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:46:13.730 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:46:13.735
2025-07-01 05:46:13.739 self = <difflib.Differ object at [hex]>
2025-07-01 05:46:13.744 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:46:13.748 alo = 146, ahi = 1101
2025-07-01 05:46:13.753 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:46:13.758 blo = 146, bhi = 1101
2025-07-01 05:46:13.762
2025-07-01 05:46:13.766 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:46:13.770 g = []
2025-07-01 05:46:13.775 if alo < ahi:
2025-07-01 05:46:13.779 if blo < bhi:
2025-07-01 05:46:13.784 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:46:13.788 else:
2025-07-01 05:46:13.792 g = self._dump('-', a, alo, ahi)
2025-07-01 05:46:13.797 elif blo < bhi:
2025-07-01 05:46:13.801 g = self._dump('+', b, blo, bhi)
2025-07-01 05:46:13.805
2025-07-01 05:46:13.810 > yield from g
2025-07-01 05:46:13.814
2025-07-01 05:46:13.818 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:46:13.823 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:46:13.828
2025-07-01 05:46:13.834 self = <difflib.Differ object at [hex]>
2025-07-01 05:46:13.840 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:46:13.846 alo = 146, ahi = 1101
2025-07-01 05:46:13.853 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:46:13.860 blo = 146, bhi = 1101
2025-07-01 05:46:13.866
2025-07-01 05:46:13.874 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:46:13.880 r"""
2025-07-01 05:46:13.886 When replacing one block of lines with another, search the blocks
2025-07-01 05:46:13.892 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:46:13.898 synch point, and intraline difference marking is done on the
2025-07-01 05:46:13.904 similar pair. Lots of work, but often worth it.
2025-07-01 05:46:13.911
2025-07-01 05:46:13.918 Example:
2025-07-01 05:46:13.928
2025-07-01 05:46:13.934 >>> d = Differ()
2025-07-01 05:46:13.941 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:46:13.951 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:46:13.958 >>> print(''.join(results), end="")
2025-07-01 05:46:13.966 - abcDefghiJkl
2025-07-01 05:46:13.986 + abcdefGhijkl
2025-07-01 05:46:13.999 """
2025-07-01 05:46:14.007
2025-07-01 05:46:14.018 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:46:14.029 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:46:14.041 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:46:14.053 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:46:14.060 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:46:14.068
2025-07-01 05:46:14.075 # search for the pair that matches best without being identical
2025-07-01 05:46:14.081 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:46:14.086 # on junk -- unless we have to)
2025-07-01 05:46:14.093 for j in range(blo, bhi):
2025-07-01 05:46:14.101 bj = b[j]
2025-07-01 05:46:14.108 cruncher.set_seq2(bj)
2025-07-01 05:46:14.120 for i in range(alo, ahi):
2025-07-01 05:46:14.129 ai = a[i]
2025-07-01 05:46:14.136 if ai == bj:
2025-07-01 05:46:14.142 if eqi is None:
2025-07-01 05:46:14.149 eqi, eqj = i, j
2025-07-01 05:46:14.155 continue
2025-07-01 05:46:14.163 cruncher.set_seq1(ai)
2025-07-01 05:46:14.175 # computing similarity is expensive, so use the quick
2025-07-01 05:46:14.183 # upper bounds first -- have seen this speed up messy
2025-07-01 05:46:14.189 # compares by a factor of 3.
2025-07-01 05:46:14.196 # note that ratio() is only expensive to compute the first
2025-07-01 05:46:14.203 # time it's called on a sequence pair; the expensive part
2025-07-01 05:46:14.214 # of the computation is cached by cruncher
2025-07-01 05:46:14.222 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:46:14.228 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:46:14.234 cruncher.ratio() > best_ratio:
2025-07-01 05:46:14.240 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:46:14.247 if best_ratio < cutoff:
2025-07-01 05:46:14.255 # no non-identical "pretty close" pair
2025-07-01 05:46:14.268 if eqi is None:
2025-07-01 05:46:14.282 # no identical pair either -- treat it as a straight replace
2025-07-01 05:46:14.291 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:46:14.298 return
2025-07-01 05:46:14.304 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:46:14.310 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:46:14.316 else:
2025-07-01 05:46:14.321 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:46:14.327 eqi = None
2025-07-01 05:46:14.333
2025-07-01 05:46:14.340 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:46:14.350 # identical
2025-07-01 05:46:14.357
2025-07-01 05:46:14.364 # pump out diffs from before the synch point
2025-07-01 05:46:14.371 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:46:14.377
2025-07-01 05:46:14.385 # do intraline marking on the synch pair
2025-07-01 05:46:14.396 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:46:14.404 if eqi is None:
2025-07-01 05:46:14.410 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:46:14.418 atags = btags = ""
2025-07-01 05:46:14.428 cruncher.set_seqs(aelt, belt)
2025-07-01 05:46:14.436 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:46:14.443 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:46:14.448 if tag == 'replace':
2025-07-01 05:46:14.456 atags += '^' * la
2025-07-01 05:46:14.464 btags += '^' * lb
2025-07-01 05:46:14.471 elif tag == 'delete':
2025-07-01 05:46:14.479 atags += '-' * la
2025-07-01 05:46:14.489 elif tag == 'insert':
2025-07-01 05:46:14.498 btags += '+' * lb
2025-07-01 05:46:14.506 elif tag == 'equal':
2025-07-01 05:46:14.511 atags += ' ' * la
2025-07-01 05:46:14.516 btags += ' ' * lb
2025-07-01 05:46:14.521 else:
2025-07-01 05:46:14.527 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:46:14.533 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:46:14.539 else:
2025-07-01 05:46:14.546 # the synch pair is identical
2025-07-01 05:46:14.556 yield ' ' + aelt
2025-07-01 05:46:14.567
2025-07-01 05:46:14.575 # pump out diffs from after the synch point
2025-07-01 05:46:14.581 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:46:14.588
2025-07-01 05:46:14.594 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:46:14.600 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:46:14.607
2025-07-01 05:46:14.614 self = <difflib.Differ object at [hex]>
2025-07-01 05:46:14.622 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:46:14.629 alo = 147, ahi = 1101
2025-07-01 05:46:14.641 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:46:14.652 blo = 147, bhi = 1101
2025-07-01 05:46:14.664
2025-07-01 05:46:14.672 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:46:14.679 g = []
2025-07-01 05:46:14.686 if alo < ahi:
2025-07-01 05:46:14.696 if blo < bhi:
2025-07-01 05:46:14.707 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:46:14.719 else:
2025-07-01 05:46:14.728 g = self._dump('-', a, alo, ahi)
2025-07-01 05:46:14.735 elif blo < bhi:
2025-07-01 05:46:14.743 g = self._dump('+', b, blo, bhi)
2025-07-01 05:46:14.752
2025-07-01 05:46:14.762 > yield from g
2025-07-01 05:46:14.774
2025-07-01 05:46:14.781 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:46:14.788 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:46:14.795
2025-07-01 05:46:14.804 self = <difflib.Differ object at [hex]>
2025-07-01 05:46:14.817 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:46:14.824 alo = 147, ahi = 1101
2025-07-01 05:46:14.831 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:46:14.836 blo = 147, bhi = 1101
2025-07-01 05:46:14.842
2025-07-01 05:46:14.848 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:46:14.860 r"""
2025-07-01 05:46:14.872 When replacing one block of lines with another, search the blocks
2025-07-01 05:46:14.883 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:46:14.893 synch point, and intraline difference marking is done on the
2025-07-01 05:46:14.901 similar pair. Lots of work, but often worth it.
2025-07-01 05:46:14.907
2025-07-01 05:46:14.913 Example:
2025-07-01 05:46:14.919
2025-07-01 05:46:14.925 >>> d = Differ()
2025-07-01 05:46:14.933 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:46:14.942 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:46:14.950 >>> print(''.join(results), end="")
2025-07-01 05:46:14.956 - abcDefghiJkl
2025-07-01 05:46:14.972 + abcdefGhijkl
2025-07-01 05:46:14.988 """
2025-07-01 05:46:14.994
2025-07-01 05:46:15.001 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:46:15.010 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:46:15.022 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:46:15.034 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:46:15.044 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:46:15.052
2025-07-01 05:46:15.060 # search for the pair that matches best without being identical
2025-07-01 05:46:15.066 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:46:15.072 # on junk -- unless we have to)
2025-07-01 05:46:15.078 for j in range(blo, bhi):
2025-07-01 05:46:15.088 bj = b[j]
2025-07-01 05:46:15.097 cruncher.set_seq2(bj)
2025-07-01 05:46:15.104 for i in range(alo, ahi):
2025-07-01 05:46:15.111 ai = a[i]
2025-07-01 05:46:15.119 if ai == bj:
2025-07-01 05:46:15.130 if eqi is None:
2025-07-01 05:46:15.139 eqi, eqj = i, j
2025-07-01 05:46:15.147 continue
2025-07-01 05:46:15.154 cruncher.set_seq1(ai)
2025-07-01 05:46:15.160 # computing similarity is expensive, so use the quick
2025-07-01 05:46:15.167 # upper bounds first -- have seen this speed up messy
2025-07-01 05:46:15.173 # compares by a factor of 3.
2025-07-01 05:46:15.179 # note that ratio() is only expensive to compute the first
2025-07-01 05:46:15.187 # time it's called on a sequence pair; the expensive part
2025-07-01 05:46:15.198 # of the computation is cached by cruncher
2025-07-01 05:46:15.209 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:46:15.220 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:46:15.230 cruncher.ratio() > best_ratio:
2025-07-01 05:46:15.243 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:46:15.253 if best_ratio < cutoff:
2025-07-01 05:46:15.261 # no non-identical "pretty close" pair
2025-07-01 05:46:15.269 if eqi is None:
2025-07-01 05:46:15.276 # no identical pair either -- treat it as a straight replace
2025-07-01 05:46:15.282 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:46:15.292 return
2025-07-01 05:46:15.301 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:46:15.308 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:46:15.314 else:
2025-07-01 05:46:15.324 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:46:15.332 eqi = None
2025-07-01 05:46:15.339
2025-07-01 05:46:15.346 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:46:15.357 # identical
2025-07-01 05:46:15.368
2025-07-01 05:46:15.379 # pump out diffs from before the synch point
2025-07-01 05:46:15.386 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:46:15.393
2025-07-01 05:46:15.400 # do intraline marking on the synch pair
2025-07-01 05:46:15.405 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:46:15.412 if eqi is None:
2025-07-01 05:46:15.419 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:46:15.427 atags = btags = ""
2025-07-01 05:46:15.438 cruncher.set_seqs(aelt, belt)
2025-07-01 05:46:15.449 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:46:15.462 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:46:15.472 if tag == 'replace':
2025-07-01 05:46:15.481 atags += '^' * la
2025-07-01 05:46:15.488 btags += '^' * lb
2025-07-01 05:46:15.495 elif tag == 'delete':
2025-07-01 05:46:15.503 atags += '-' * la
2025-07-01 05:46:15.513 elif tag == 'insert':
2025-07-01 05:46:15.521 btags += '+' * lb
2025-07-01 05:46:15.529 elif tag == 'equal':
2025-07-01 05:46:15.535 atags += ' ' * la
2025-07-01 05:46:15.542 btags += ' ' * lb
2025-07-01 05:46:15.553 else:
2025-07-01 05:46:15.564 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:46:15.573 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:46:15.580 else:
2025-07-01 05:46:15.587 # the synch pair is identical
2025-07-01 05:46:15.595 yield ' ' + aelt
2025-07-01 05:46:15.606
2025-07-01 05:46:15.616 # pump out diffs from after the synch point
2025-07-01 05:46:15.624 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:46:15.631
2025-07-01 05:46:15.639 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:46:15.650 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:46:15.658
2025-07-01 05:46:15.665 self = <difflib.Differ object at [hex]>
2025-07-01 05:46:15.675 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:46:15.686 alo = 148, ahi = 1101
2025-07-01 05:46:15.695 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:46:15.703 blo = 148, bhi = 1101
2025-07-01 05:46:15.710
2025-07-01 05:46:15.720 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:46:15.731 g = []
2025-07-01 05:46:15.740 if alo < ahi:
2025-07-01 05:46:15.747 if blo < bhi:
2025-07-01 05:46:15.754 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:46:15.760 else:
2025-07-01 05:46:15.766 g = self._dump('-', a, alo, ahi)
2025-07-01 05:46:15.772 elif blo < bhi:
2025-07-01 05:46:15.779 g = self._dump('+', b, blo, bhi)
2025-07-01 05:46:15.790
2025-07-01 05:46:15.800 > yield from g
2025-07-01 05:46:15.811
2025-07-01 05:46:15.820 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:46:15.828 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:46:15.834
2025-07-01 05:46:15.841 self = <difflib.Differ object at [hex]>
2025-07-01 05:46:15.848 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:46:15.854 alo = 148, ahi = 1101
2025-07-01 05:46:15.860 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:46:15.866 blo = 148, bhi = 1101
2025-07-01 05:46:15.876
2025-07-01 05:46:15.886 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:46:15.893 r"""
2025-07-01 05:46:15.901 When replacing one block of lines with another, search the blocks
2025-07-01 05:46:15.907 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:46:15.913 synch point, and intraline difference marking is done on the
2025-07-01 05:46:15.919 similar pair. Lots of work, but often worth it.
2025-07-01 05:46:15.924
2025-07-01 05:46:15.930 Example:
2025-07-01 05:46:15.935
2025-07-01 05:46:15.943 >>> d = Differ()
2025-07-01 05:46:15.954 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:46:15.962 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:46:15.971 >>> print(''.join(results), end="")
2025-07-01 05:46:15.979 - abcDefghiJkl
2025-07-01 05:46:15.992 + abcdefGhijkl
2025-07-01 05:46:16.006 """
2025-07-01 05:46:16.012
2025-07-01 05:46:16.019 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:46:16.027 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:46:16.035 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:46:16.047 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:46:16.055 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:46:16.061
2025-07-01 05:46:16.067 # search for the pair that matches best without being identical
2025-07-01 05:46:16.072 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:46:16.077 # on junk -- unless we have to)
2025-07-01 05:46:16.083 for j in range(blo, bhi):
2025-07-01 05:46:16.094 bj = b[j]
2025-07-01 05:46:16.106 cruncher.set_seq2(bj)
2025-07-01 05:46:16.116 for i in range(alo, ahi):
2025-07-01 05:46:16.128 ai = a[i]
2025-07-01 05:46:16.137 if ai == bj:
2025-07-01 05:46:16.149 if eqi is None:
2025-07-01 05:46:16.155 eqi, eqj = i, j
2025-07-01 05:46:16.161 continue
2025-07-01 05:46:16.168 cruncher.set_seq1(ai)
2025-07-01 05:46:16.175 # computing similarity is expensive, so use the quick
2025-07-01 05:46:16.181 # upper bounds first -- have seen this speed up messy
2025-07-01 05:46:16.187 # compares by a factor of 3.
2025-07-01 05:46:16.193 # note that ratio() is only expensive to compute the first
2025-07-01 05:46:16.200 # time it's called on a sequence pair; the expensive part
2025-07-01 05:46:16.206 # of the computation is cached by cruncher
2025-07-01 05:46:16.217 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:46:16.228 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:46:16.236 cruncher.ratio() > best_ratio:
2025-07-01 05:46:16.243 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:46:16.255 if best_ratio < cutoff:
2025-07-01 05:46:16.265 # no non-identical "pretty close" pair
2025-07-01 05:46:16.274 if eqi is None:
2025-07-01 05:46:16.282 # no identical pair either -- treat it as a straight replace
2025-07-01 05:46:16.288 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:46:16.294 return
2025-07-01 05:46:16.300 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:46:16.305 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:46:16.317 else:
2025-07-01 05:46:16.326 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:46:16.334 eqi = None
2025-07-01 05:46:16.346
2025-07-01 05:46:16.355 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:46:16.363 # identical
2025-07-01 05:46:16.370
2025-07-01 05:46:16.376 # pump out diffs from before the synch point
2025-07-01 05:46:16.382 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:46:16.388
2025-07-01 05:46:16.394 # do intraline marking on the synch pair
2025-07-01 05:46:16.401 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:46:16.411 if eqi is None:
2025-07-01 05:46:16.420 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:46:16.428 atags = btags = ""
2025-07-01 05:46:16.435 cruncher.set_seqs(aelt, belt)
2025-07-01 05:46:16.441 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:46:16.446 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:46:16.450 if tag == 'replace':
2025-07-01 05:46:16.455 atags += '^' * la
2025-07-01 05:46:16.459 btags += '^' * lb
2025-07-01 05:46:16.463 elif tag == 'delete':
2025-07-01 05:46:16.468 atags += '-' * la
2025-07-01 05:46:16.472 elif tag == 'insert':
2025-07-01 05:46:16.476 btags += '+' * lb
2025-07-01 05:46:16.481 elif tag == 'equal':
2025-07-01 05:46:16.485 atags += ' ' * la
2025-07-01 05:46:16.490 btags += ' ' * lb
2025-07-01 05:46:16.494 else:
2025-07-01 05:46:16.498 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:46:16.503 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:46:16.507 else:
2025-07-01 05:46:16.512 # the synch pair is identical
2025-07-01 05:46:16.516 yield ' ' + aelt
2025-07-01 05:46:16.520
2025-07-01 05:46:16.525 # pump out diffs from after the synch point
2025-07-01 05:46:16.529 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:46:16.534
2025-07-01 05:46:16.540 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:46:16.546 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:46:16.554
2025-07-01 05:46:16.564 self = <difflib.Differ object at [hex]>
2025-07-01 05:46:16.574 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:46:16.580 alo = 149, ahi = 1101
2025-07-01 05:46:16.586 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:46:16.592 blo = 149, bhi = 1101
2025-07-01 05:46:16.598
2025-07-01 05:46:16.605 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:46:16.611 g = []
2025-07-01 05:46:16.618 if alo < ahi:
2025-07-01 05:46:16.629 if blo < bhi:
2025-07-01 05:46:16.638 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:46:16.646 else:
2025-07-01 05:46:16.652 g = self._dump('-', a, alo, ahi)
2025-07-01 05:46:16.658 elif blo < bhi:
2025-07-01 05:46:16.664 g = self._dump('+', b, blo, bhi)
2025-07-01 05:46:16.669
2025-07-01 05:46:16.675 > yield from g
2025-07-01 05:46:16.681
2025-07-01 05:46:16.687 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:46:16.693 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:46:16.699
2025-07-01 05:46:16.707 self = <difflib.Differ object at [hex]>
2025-07-01 05:46:16.713 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:46:16.719 alo = 149, ahi = 1101
2025-07-01 05:46:16.725 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:46:16.730 blo = 149, bhi = 1101
2025-07-01 05:46:16.736
2025-07-01 05:46:16.742 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:46:16.747 r"""
2025-07-01 05:46:16.753 When replacing one block of lines with another, search the blocks
2025-07-01 05:46:16.759 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:46:16.765 synch point, and intraline difference marking is done on the
2025-07-01 05:46:16.770 similar pair. Lots of work, but often worth it.
2025-07-01 05:46:16.776
2025-07-01 05:46:16.782 Example:
2025-07-01 05:46:16.787
2025-07-01 05:46:16.793 >>> d = Differ()
2025-07-01 05:46:16.798 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:46:16.805 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:46:16.811 >>> print(''.join(results), end="")
2025-07-01 05:46:16.818 - abcDefghiJkl
2025-07-01 05:46:16.830 + abcdefGhijkl
2025-07-01 05:46:16.841 """
2025-07-01 05:46:16.846
2025-07-01 05:46:16.852 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:46:16.858 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:46:16.864 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:46:16.870 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:46:16.875 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:46:16.881
2025-07-01 05:46:16.887 # search for the pair that matches best without being identical
2025-07-01 05:46:16.893 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:46:16.898 # on junk -- unless we have to)
2025-07-01 05:46:16.904 for j in range(blo, bhi):
2025-07-01 05:46:16.917 bj = b[j]
2025-07-01 05:46:16.924 cruncher.set_seq2(bj)
2025-07-01 05:46:16.931 for i in range(alo, ahi):
2025-07-01 05:46:16.937 ai = a[i]
2025-07-01 05:46:16.944 if ai == bj:
2025-07-01 05:46:16.951 if eqi is None:
2025-07-01 05:46:16.957 eqi, eqj = i, j
2025-07-01 05:46:16.962 continue
2025-07-01 05:46:16.968 cruncher.set_seq1(ai)
2025-07-01 05:46:16.974 # computing similarity is expensive, so use the quick
2025-07-01 05:46:16.980 # upper bounds first -- have seen this speed up messy
2025-07-01 05:46:16.986 # compares by a factor of 3.
2025-07-01 05:46:16.996 # note that ratio() is only expensive to compute the first
2025-07-01 05:46:17.003 # time it's called on a sequence pair; the expensive part
2025-07-01 05:46:17.012 # of the computation is cached by cruncher
2025-07-01 05:46:17.020 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:46:17.027 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:46:17.032 cruncher.ratio() > best_ratio:
2025-07-01 05:46:17.038 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:46:17.044 if best_ratio < cutoff:
2025-07-01 05:46:17.050 # no non-identical "pretty close" pair
2025-07-01 05:46:17.057 if eqi is None:
2025-07-01 05:46:17.063 # no identical pair either -- treat it as a straight replace
2025-07-01 05:46:17.069 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:46:17.076 return
2025-07-01 05:46:17.083 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:46:17.091 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:46:17.101 else:
2025-07-01 05:46:17.108 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:46:17.114 eqi = None
2025-07-01 05:46:17.120
2025-07-01 05:46:17.126 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:46:17.134 # identical
2025-07-01 05:46:17.142
2025-07-01 05:46:17.150 # pump out diffs from before the synch point
2025-07-01 05:46:17.161 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:46:17.171
2025-07-01 05:46:17.178 # do intraline marking on the synch pair
2025-07-01 05:46:17.190 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:46:17.200 if eqi is None:
2025-07-01 05:46:17.207 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:46:17.213 atags = btags = ""
2025-07-01 05:46:17.219 cruncher.set_seqs(aelt, belt)
2025-07-01 05:46:17.224 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:46:17.230 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:46:17.239 if tag == 'replace':
2025-07-01 05:46:17.249 atags += '^' * la
2025-07-01 05:46:17.256 btags += '^' * lb
2025-07-01 05:46:17.262 elif tag == 'delete':
2025-07-01 05:46:17.268 atags += '-' * la
2025-07-01 05:46:17.274 elif tag == 'insert':
2025-07-01 05:46:17.281 btags += '+' * lb
2025-07-01 05:46:17.286 elif tag == 'equal':
2025-07-01 05:46:17.291 atags += ' ' * la
2025-07-01 05:46:17.297 btags += ' ' * lb
2025-07-01 05:46:17.303 else:
2025-07-01 05:46:17.308 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:46:17.315 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:46:17.321 else:
2025-07-01 05:46:17.328 # the synch pair is identical
2025-07-01 05:46:17.334 yield ' ' + aelt
2025-07-01 05:46:17.345
2025-07-01 05:46:17.356 # pump out diffs from after the synch point
2025-07-01 05:46:17.364 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:46:17.370
2025-07-01 05:46:17.374 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:46:17.380 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:46:17.384
2025-07-01 05:46:17.390 self = <difflib.Differ object at [hex]>
2025-07-01 05:46:17.396 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:46:17.401 alo = 150, ahi = 1101
2025-07-01 05:46:17.410 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:46:17.417 blo = 150, bhi = 1101
2025-07-01 05:46:17.427
2025-07-01 05:46:17.434 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:46:17.441 g = []
2025-07-01 05:46:17.446 if alo < ahi:
2025-07-01 05:46:17.452 if blo < bhi:
2025-07-01 05:46:17.457 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:46:17.461 else:
2025-07-01 05:46:17.466 g = self._dump('-', a, alo, ahi)
2025-07-01 05:46:17.470 elif blo < bhi:
2025-07-01 05:46:17.476 g = self._dump('+', b, blo, bhi)
2025-07-01 05:46:17.484
2025-07-01 05:46:17.494 > yield from g
2025-07-01 05:46:17.503
2025-07-01 05:46:17.512 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:46:17.520 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:46:17.526
2025-07-01 05:46:17.533 self = <difflib.Differ object at [hex]>
2025-07-01 05:46:17.541 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:46:17.553 alo = 150, ahi = 1101
2025-07-01 05:46:17.563 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:46:17.570 blo = 150, bhi = 1101
2025-07-01 05:46:17.576
2025-07-01 05:46:17.587 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:46:17.597 r"""
2025-07-01 05:46:17.607 When replacing one block of lines with another, search the blocks
2025-07-01 05:46:17.619 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:46:17.627 synch point, and intraline difference marking is done on the
2025-07-01 05:46:17.640 similar pair. Lots of work, but often worth it.
2025-07-01 05:46:17.653
2025-07-01 05:46:17.661 Example:
2025-07-01 05:46:17.668
2025-07-01 05:46:17.675 >>> d = Differ()
2025-07-01 05:46:17.682 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:46:17.690 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:46:17.697 >>> print(''.join(results), end="")
2025-07-01 05:46:17.703 - abcDefghiJkl
2025-07-01 05:46:17.722 + abcdefGhijkl
2025-07-01 05:46:17.736 """
2025-07-01 05:46:17.749
2025-07-01 05:46:17.757 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:46:17.764 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:46:17.768 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:46:17.773 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:46:17.779 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:46:17.784
2025-07-01 05:46:17.790 # search for the pair that matches best without being identical
2025-07-01 05:46:17.802 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:46:17.812 # on junk -- unless we have to)
2025-07-01 05:46:17.823 for j in range(blo, bhi):
2025-07-01 05:46:17.833 bj = b[j]
2025-07-01 05:46:17.843 cruncher.set_seq2(bj)
2025-07-01 05:46:17.852 for i in range(alo, ahi):
2025-07-01 05:46:17.860 ai = a[i]
2025-07-01 05:46:17.873 if ai == bj:
2025-07-01 05:46:17.883 if eqi is None:
2025-07-01 05:46:17.890 eqi, eqj = i, j
2025-07-01 05:46:17.896 continue
2025-07-01 05:46:17.902 cruncher.set_seq1(ai)
2025-07-01 05:46:17.908 # computing similarity is expensive, so use the quick
2025-07-01 05:46:17.916 # upper bounds first -- have seen this speed up messy
2025-07-01 05:46:17.927 # compares by a factor of 3.
2025-07-01 05:46:17.935 # note that ratio() is only expensive to compute the first
2025-07-01 05:46:17.942 # time it's called on a sequence pair; the expensive part
2025-07-01 05:46:17.951 # of the computation is cached by cruncher
2025-07-01 05:46:17.959 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:46:17.967 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:46:17.975 cruncher.ratio() > best_ratio:
2025-07-01 05:46:17.985 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:46:17.993 if best_ratio < cutoff:
2025-07-01 05:46:18.006 # no non-identical "pretty close" pair
2025-07-01 05:46:18.014 if eqi is None:
2025-07-01 05:46:18.023 # no identical pair either -- treat it as a straight replace
2025-07-01 05:46:18.032 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:46:18.040 return
2025-07-01 05:46:18.047 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:46:18.055 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:46:18.066 else:
2025-07-01 05:46:18.075 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:46:18.082 eqi = None
2025-07-01 05:46:18.093
2025-07-01 05:46:18.103 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:46:18.111 # identical
2025-07-01 05:46:18.119
2025-07-01 05:46:18.125 # pump out diffs from before the synch point
2025-07-01 05:46:18.136 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:46:18.145
2025-07-01 05:46:18.152 # do intraline marking on the synch pair
2025-07-01 05:46:18.160 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:46:18.166 if eqi is None:
2025-07-01 05:46:18.177 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:46:18.186 atags = btags = ""
2025-07-01 05:46:18.193 cruncher.set_seqs(aelt, belt)
2025-07-01 05:46:18.201 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:46:18.207 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:46:18.215 if tag == 'replace':
2025-07-01 05:46:18.225 atags += '^' * la
2025-07-01 05:46:18.233 btags += '^' * lb
2025-07-01 05:46:18.243 elif tag == 'delete':
2025-07-01 05:46:18.252 atags += '-' * la
2025-07-01 05:46:18.259 elif tag == 'insert':
2025-07-01 05:46:18.267 btags += '+' * lb
2025-07-01 05:46:18.273 elif tag == 'equal':
2025-07-01 05:46:18.285 atags += ' ' * la
2025-07-01 05:46:18.297 btags += ' ' * lb
2025-07-01 05:46:18.307 else:
2025-07-01 05:46:18.317 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:46:18.324 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:46:18.334 else:
2025-07-01 05:46:18.347 # the synch pair is identical
2025-07-01 05:46:18.358 yield ' ' + aelt
2025-07-01 05:46:18.367
2025-07-01 05:46:18.380 # pump out diffs from after the synch point
2025-07-01 05:46:18.391 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:46:18.403
2025-07-01 05:46:18.412 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:46:18.420 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:46:18.427
2025-07-01 05:46:18.433 self = <difflib.Differ object at [hex]>
2025-07-01 05:46:18.438 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:46:18.443 alo = 151, ahi = 1101
2025-07-01 05:46:18.450 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:46:18.456 blo = 151, bhi = 1101
2025-07-01 05:46:18.461
2025-07-01 05:46:18.467 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:46:18.473 g = []
2025-07-01 05:46:18.478 if alo < ahi:
2025-07-01 05:46:18.484 if blo < bhi:
2025-07-01 05:46:18.490 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:46:18.498 else:
2025-07-01 05:46:18.506 g = self._dump('-', a, alo, ahi)
2025-07-01 05:46:18.511 elif blo < bhi:
2025-07-01 05:46:18.516 g = self._dump('+', b, blo, bhi)
2025-07-01 05:46:18.521
2025-07-01 05:46:18.527 > yield from g
2025-07-01 05:46:18.532
2025-07-01 05:46:18.537 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:46:18.543 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:46:18.551
2025-07-01 05:46:18.557 self = <difflib.Differ object at [hex]>
2025-07-01 05:46:18.564 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:46:18.568 alo = 151, ahi = 1101
2025-07-01 05:46:18.574 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:46:18.578 blo = 151, bhi = 1101
2025-07-01 05:46:18.583
2025-07-01 05:46:18.589 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:46:18.594 r"""
2025-07-01 05:46:18.598 When replacing one block of lines with another, search the blocks
2025-07-01 05:46:18.603 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:46:18.609 synch point, and intraline difference marking is done on the
2025-07-01 05:46:18.614 similar pair. Lots of work, but often worth it.
2025-07-01 05:46:18.619
2025-07-01 05:46:18.625 Example:
2025-07-01 05:46:18.633
2025-07-01 05:46:18.640 >>> d = Differ()
2025-07-01 05:46:18.647 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:46:18.652 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:46:18.658 >>> print(''.join(results), end="")
2025-07-01 05:46:18.663 - abcDefghiJkl
2025-07-01 05:46:18.674 + abcdefGhijkl
2025-07-01 05:46:18.691 """
2025-07-01 05:46:18.697
2025-07-01 05:46:18.705 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:46:18.711 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:46:18.717 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:46:18.722 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:46:18.727 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:46:18.732
2025-07-01 05:46:18.737 # search for the pair that matches best without being identical
2025-07-01 05:46:18.742 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:46:18.747 # on junk -- unless we have to)
2025-07-01 05:46:18.753 for j in range(blo, bhi):
2025-07-01 05:46:18.759 bj = b[j]
2025-07-01 05:46:18.766 cruncher.set_seq2(bj)
2025-07-01 05:46:18.773 for i in range(alo, ahi):
2025-07-01 05:46:18.779 ai = a[i]
2025-07-01 05:46:18.786 if ai == bj:
2025-07-01 05:46:18.792 if eqi is None:
2025-07-01 05:46:18.799 eqi, eqj = i, j
2025-07-01 05:46:18.805 continue
2025-07-01 05:46:18.812 cruncher.set_seq1(ai)
2025-07-01 05:46:18.819 # computing similarity is expensive, so use the quick
2025-07-01 05:46:18.828 # upper bounds first -- have seen this speed up messy
2025-07-01 05:46:18.839 # compares by a factor of 3.
2025-07-01 05:46:18.847 # note that ratio() is only expensive to compute the first
2025-07-01 05:46:18.854 # time it's called on a sequence pair; the expensive part
2025-07-01 05:46:18.860 # of the computation is cached by cruncher
2025-07-01 05:46:18.865 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:46:18.870 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:46:18.876 cruncher.ratio() > best_ratio:
2025-07-01 05:46:18.881 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:46:18.885 if best_ratio < cutoff:
2025-07-01 05:46:18.890 # no non-identical "pretty close" pair
2025-07-01 05:46:18.895 if eqi is None:
2025-07-01 05:46:18.900 # no identical pair either -- treat it as a straight replace
2025-07-01 05:46:18.906 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:46:18.911 return
2025-07-01 05:46:18.917 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:46:18.926 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:46:18.935 else:
2025-07-01 05:46:18.943 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:46:18.949 eqi = None
2025-07-01 05:46:18.955
2025-07-01 05:46:18.961 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:46:18.967 # identical
2025-07-01 05:46:18.975
2025-07-01 05:46:18.986 # pump out diffs from before the synch point
2025-07-01 05:46:18.994 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:46:19.002
2025-07-01 05:46:19.007 # do intraline marking on the synch pair
2025-07-01 05:46:19.012 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:46:19.017 if eqi is None:
2025-07-01 05:46:19.023 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:46:19.028 atags = btags = ""
2025-07-01 05:46:19.034 cruncher.set_seqs(aelt, belt)
2025-07-01 05:46:19.040 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:46:19.046 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:46:19.053 if tag == 'replace':
2025-07-01 05:46:19.058 atags += '^' * la
2025-07-01 05:46:19.062 btags += '^' * lb
2025-07-01 05:46:19.068 elif tag == 'delete':
2025-07-01 05:46:19.073 atags += '-' * la
2025-07-01 05:46:19.079 elif tag == 'insert':
2025-07-01 05:46:19.084 btags += '+' * lb
2025-07-01 05:46:19.090 elif tag == 'equal':
2025-07-01 05:46:19.101 atags += ' ' * la
2025-07-01 05:46:19.109 btags += ' ' * lb
2025-07-01 05:46:19.117 else:
2025-07-01 05:46:19.123 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:46:19.129 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:46:19.135 else:
2025-07-01 05:46:19.142 # the synch pair is identical
2025-07-01 05:46:19.148 yield ' ' + aelt
2025-07-01 05:46:19.155
2025-07-01 05:46:19.163 # pump out diffs from after the synch point
2025-07-01 05:46:19.173 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:46:19.182
2025-07-01 05:46:19.191 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:46:19.201 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:46:19.208
2025-07-01 05:46:19.219 self = <difflib.Differ object at [hex]>
2025-07-01 05:46:19.228 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:46:19.235 alo = 152, ahi = 1101
2025-07-01 05:46:19.247 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:46:19.254 blo = 152, bhi = 1101
2025-07-01 05:46:19.260
2025-07-01 05:46:19.267 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:46:19.273 g = []
2025-07-01 05:46:19.280 if alo < ahi:
2025-07-01 05:46:19.286 if blo < bhi:
2025-07-01 05:46:19.293 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:46:19.300 else:
2025-07-01 05:46:19.308 g = self._dump('-', a, alo, ahi)
2025-07-01 05:46:19.314 elif blo < bhi:
2025-07-01 05:46:19.321 g = self._dump('+', b, blo, bhi)
2025-07-01 05:46:19.328
2025-07-01 05:46:19.335 > yield from g
2025-07-01 05:46:19.341
2025-07-01 05:46:19.347 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:46:19.354 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:46:19.360
2025-07-01 05:46:19.366 self = <difflib.Differ object at [hex]>
2025-07-01 05:46:19.372 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:46:19.378 alo = 152, ahi = 1101
2025-07-01 05:46:19.387 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:46:19.396 blo = 152, bhi = 1101
2025-07-01 05:46:19.402
2025-07-01 05:46:19.409 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:46:19.416 r"""
2025-07-01 05:46:19.423 When replacing one block of lines with another, search the blocks
2025-07-01 05:46:19.430 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:46:19.437 synch point, and intraline difference marking is done on the
2025-07-01 05:46:19.448 similar pair. Lots of work, but often worth it.
2025-07-01 05:46:19.458
2025-07-01 05:46:19.466 Example:
2025-07-01 05:46:19.478
2025-07-01 05:46:19.487 >>> d = Differ()
2025-07-01 05:46:19.493 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:46:19.498 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:46:19.503 >>> print(''.join(results), end="")
2025-07-01 05:46:19.508 - abcDefghiJkl
2025-07-01 05:46:19.517 + abcdefGhijkl
2025-07-01 05:46:19.529 """
2025-07-01 05:46:19.535
2025-07-01 05:46:19.542 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:46:19.549 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:46:19.555 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:46:19.563 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:46:19.570 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:46:19.578
2025-07-01 05:46:19.587 # search for the pair that matches best without being identical
2025-07-01 05:46:19.598 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:46:19.606 # on junk -- unless we have to)
2025-07-01 05:46:19.612 for j in range(blo, bhi):
2025-07-01 05:46:19.618 bj = b[j]
2025-07-01 05:46:19.625 cruncher.set_seq2(bj)
2025-07-01 05:46:19.631 for i in range(alo, ahi):
2025-07-01 05:46:19.639 ai = a[i]
2025-07-01 05:46:19.650 if ai == bj:
2025-07-01 05:46:19.658 if eqi is None:
2025-07-01 05:46:19.665 eqi, eqj = i, j
2025-07-01 05:46:19.672 continue
2025-07-01 05:46:19.678 cruncher.set_seq1(ai)
2025-07-01 05:46:19.686 # computing similarity is expensive, so use the quick
2025-07-01 05:46:19.694 # upper bounds first -- have seen this speed up messy
2025-07-01 05:46:19.701 # compares by a factor of 3.
2025-07-01 05:46:19.708 # note that ratio() is only expensive to compute the first
2025-07-01 05:46:19.716 # time it's called on a sequence pair; the expensive part
2025-07-01 05:46:19.724 # of the computation is cached by cruncher
2025-07-01 05:46:19.730 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:46:19.736 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:46:19.742 cruncher.ratio() > best_ratio:
2025-07-01 05:46:19.750 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:46:19.758 if best_ratio < cutoff:
2025-07-01 05:46:19.771 # no non-identical "pretty close" pair
2025-07-01 05:46:19.781 if eqi is None:
2025-07-01 05:46:19.788 # no identical pair either -- treat it as a straight replace
2025-07-01 05:46:19.795 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:46:19.801 return
2025-07-01 05:46:19.807 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:46:19.812 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:46:19.818 else:
2025-07-01 05:46:19.823 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:46:19.828 eqi = None
2025-07-01 05:46:19.833
2025-07-01 05:46:19.838 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:46:19.843 # identical
2025-07-01 05:46:19.850
2025-07-01 05:46:19.860 # pump out diffs from before the synch point
2025-07-01 05:46:19.868 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:46:19.873
2025-07-01 05:46:19.879 # do intraline marking on the synch pair
2025-07-01 05:46:19.885 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:46:19.890 if eqi is None:
2025-07-01 05:46:19.896 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:46:19.902 atags = btags = ""
2025-07-01 05:46:19.908 cruncher.set_seqs(aelt, belt)
2025-07-01 05:46:19.914 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:46:19.921 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:46:19.930 if tag == 'replace':
2025-07-01 05:46:19.942 atags += '^' * la
2025-07-01 05:46:19.954 btags += '^' * lb
2025-07-01 05:46:19.965 elif tag == 'delete':
2025-07-01 05:46:19.973 atags += '-' * la
2025-07-01 05:46:19.984 elif tag == 'insert':
2025-07-01 05:46:19.994 btags += '+' * lb
2025-07-01 05:46:20.004 elif tag == 'equal':
2025-07-01 05:46:20.017 atags += ' ' * la
2025-07-01 05:46:20.029 btags += ' ' * lb
2025-07-01 05:46:20.040 else:
2025-07-01 05:46:20.048 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:46:20.054 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:46:20.060 else:
2025-07-01 05:46:20.066 # the synch pair is identical
2025-07-01 05:46:20.071 yield ' ' + aelt
2025-07-01 05:46:20.078
2025-07-01 05:46:20.088 # pump out diffs from after the synch point
2025-07-01 05:46:20.100 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:46:20.110
2025-07-01 05:46:20.118 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:46:20.129 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:46:20.140
2025-07-01 05:46:20.153 self = <difflib.Differ object at [hex]>
2025-07-01 05:46:20.164 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:46:20.174 alo = 153, ahi = 1101
2025-07-01 05:46:20.183 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:46:20.188 blo = 153, bhi = 1101
2025-07-01 05:46:20.193
2025-07-01 05:46:20.198 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:46:20.202 g = []
2025-07-01 05:46:20.210 if alo < ahi:
2025-07-01 05:46:20.219 if blo < bhi:
2025-07-01 05:46:20.226 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:46:20.233 else:
2025-07-01 05:46:20.239 g = self._dump('-', a, alo, ahi)
2025-07-01 05:46:20.245 elif blo < bhi:
2025-07-01 05:46:20.256 g = self._dump('+', b, blo, bhi)
2025-07-01 05:46:20.266
2025-07-01 05:46:20.274 > yield from g
2025-07-01 05:46:20.284
2025-07-01 05:46:20.299 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:46:20.308 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:46:20.316
2025-07-01 05:46:20.323 self = <difflib.Differ object at [hex]>
2025-07-01 05:46:20.332 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:46:20.339 alo = 153, ahi = 1101
2025-07-01 05:46:20.348 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:46:20.355 blo = 153, bhi = 1101
2025-07-01 05:46:20.367
2025-07-01 05:46:20.377 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:46:20.384 r"""
2025-07-01 05:46:20.395 When replacing one block of lines with another, search the blocks
2025-07-01 05:46:20.402 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:46:20.409 synch point, and intraline difference marking is done on the
2025-07-01 05:46:20.416 similar pair. Lots of work, but often worth it.
2025-07-01 05:46:20.421
2025-07-01 05:46:20.426 Example:
2025-07-01 05:46:20.431
2025-07-01 05:46:20.437 >>> d = Differ()
2025-07-01 05:46:20.441 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:46:20.446 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:46:20.450 >>> print(''.join(results), end="")
2025-07-01 05:46:20.454 - abcDefghiJkl
2025-07-01 05:46:20.463 + abcdefGhijkl
2025-07-01 05:46:20.474 """
2025-07-01 05:46:20.479
2025-07-01 05:46:20.485 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:46:20.496 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:46:20.506 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:46:20.514 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:46:20.520 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:46:20.526
2025-07-01 05:46:20.536 # search for the pair that matches best without being identical
2025-07-01 05:46:20.545 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:46:20.553 # on junk -- unless we have to)
2025-07-01 05:46:20.559 for j in range(blo, bhi):
2025-07-01 05:46:20.564 bj = b[j]
2025-07-01 05:46:20.569 cruncher.set_seq2(bj)
2025-07-01 05:46:20.574 for i in range(alo, ahi):
2025-07-01 05:46:20.579 ai = a[i]
2025-07-01 05:46:20.587 if ai == bj:
2025-07-01 05:46:20.595 if eqi is None:
2025-07-01 05:46:20.601 eqi, eqj = i, j
2025-07-01 05:46:20.606 continue
2025-07-01 05:46:20.611 cruncher.set_seq1(ai)
2025-07-01 05:46:20.619 # computing similarity is expensive, so use the quick
2025-07-01 05:46:20.630 # upper bounds first -- have seen this speed up messy
2025-07-01 05:46:20.640 # compares by a factor of 3.
2025-07-01 05:46:20.648 # note that ratio() is only expensive to compute the first
2025-07-01 05:46:20.656 # time it's called on a sequence pair; the expensive part
2025-07-01 05:46:20.662 # of the computation is cached by cruncher
2025-07-01 05:46:20.668 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:46:20.673 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:46:20.677 cruncher.ratio() > best_ratio:
2025-07-01 05:46:20.682 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:46:20.686 if best_ratio < cutoff:
2025-07-01 05:46:20.692 # no non-identical "pretty close" pair
2025-07-01 05:46:20.698 if eqi is None:
2025-07-01 05:46:20.704 # no identical pair either -- treat it as a straight replace
2025-07-01 05:46:20.710 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:46:20.716 return
2025-07-01 05:46:20.723 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:46:20.728 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:46:20.734 else:
2025-07-01 05:46:20.740 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:46:20.746 eqi = None
2025-07-01 05:46:20.751
2025-07-01 05:46:20.757 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:46:20.763 # identical
2025-07-01 05:46:20.769
2025-07-01 05:46:20.780 # pump out diffs from before the synch point
2025-07-01 05:46:20.789 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:46:20.795
2025-07-01 05:46:20.802 # do intraline marking on the synch pair
2025-07-01 05:46:20.807 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:46:20.811 if eqi is None:
2025-07-01 05:46:20.816 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:46:20.820 atags = btags = ""
2025-07-01 05:46:20.825 cruncher.set_seqs(aelt, belt)
2025-07-01 05:46:20.831 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:46:20.837 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:46:20.841 if tag == 'replace':
2025-07-01 05:46:20.845 atags += '^' * la
2025-07-01 05:46:20.853 btags += '^' * lb
2025-07-01 05:46:20.859 elif tag == 'delete':
2025-07-01 05:46:20.864 atags += '-' * la
2025-07-01 05:46:20.869 elif tag == 'insert':
2025-07-01 05:46:20.874 btags += '+' * lb
2025-07-01 05:46:20.878 elif tag == 'equal':
2025-07-01 05:46:20.882 atags += ' ' * la
2025-07-01 05:46:20.888 btags += ' ' * lb
2025-07-01 05:46:20.893 else:
2025-07-01 05:46:20.899 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:46:20.905 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:46:20.911 else:
2025-07-01 05:46:20.918 # the synch pair is identical
2025-07-01 05:46:20.928 yield ' ' + aelt
2025-07-01 05:46:20.936
2025-07-01 05:46:20.943 # pump out diffs from after the synch point
2025-07-01 05:46:20.948 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:46:20.954
2025-07-01 05:46:20.958 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:46:20.963 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:46:20.969
2025-07-01 05:46:20.976 self = <difflib.Differ object at [hex]>
2025-07-01 05:46:20.984 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:46:20.991 alo = 154, ahi = 1101
2025-07-01 05:46:20.999 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:46:21.006 blo = 154, bhi = 1101
2025-07-01 05:46:21.013
2025-07-01 05:46:21.020 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:46:21.027 g = []
2025-07-01 05:46:21.034 if alo < ahi:
2025-07-01 05:46:21.045 if blo < bhi:
2025-07-01 05:46:21.055 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:46:21.063 else:
2025-07-01 05:46:21.070 g = self._dump('-', a, alo, ahi)
2025-07-01 05:46:21.077 elif blo < bhi:
2025-07-01 05:46:21.083 g = self._dump('+', b, blo, bhi)
2025-07-01 05:46:21.089
2025-07-01 05:46:21.095 > yield from g
2025-07-01 05:46:21.100
2025-07-01 05:46:21.106 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:46:21.112 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:46:21.118
2025-07-01 05:46:21.129 self = <difflib.Differ object at [hex]>
2025-07-01 05:46:21.142 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:46:21.153 alo = 154, ahi = 1101
2025-07-01 05:46:21.166 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:46:21.174 blo = 154, bhi = 1101
2025-07-01 05:46:21.181
2025-07-01 05:46:21.188 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:46:21.194 r"""
2025-07-01 05:46:21.204 When replacing one block of lines with another, search the blocks
2025-07-01 05:46:21.213 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:46:21.220 synch point, and intraline difference marking is done on the
2025-07-01 05:46:21.226 similar pair. Lots of work, but often worth it.
2025-07-01 05:46:21.235
2025-07-01 05:46:21.244 Example:
2025-07-01 05:46:21.250
2025-07-01 05:46:21.257 >>> d = Differ()
2025-07-01 05:46:21.262 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:46:21.272 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:46:21.282 >>> print(''.join(results), end="")
2025-07-01 05:46:21.292 - abcDefghiJkl
2025-07-01 05:46:21.315 + abcdefGhijkl
2025-07-01 05:46:21.331 """
2025-07-01 05:46:21.342
2025-07-01 05:46:21.353 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:46:21.365 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:46:21.374 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:46:21.387 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:46:21.393 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:46:21.397
2025-07-01 05:46:21.402 # search for the pair that matches best without being identical
2025-07-01 05:46:21.408 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:46:21.413 # on junk -- unless we have to)
2025-07-01 05:46:21.420 for j in range(blo, bhi):
2025-07-01 05:46:21.426 bj = b[j]
2025-07-01 05:46:21.437 cruncher.set_seq2(bj)
2025-07-01 05:46:21.445 for i in range(alo, ahi):
2025-07-01 05:46:21.454 ai = a[i]
2025-07-01 05:46:21.461 if ai == bj:
2025-07-01 05:46:21.469 if eqi is None:
2025-07-01 05:46:21.476 eqi, eqj = i, j
2025-07-01 05:46:21.482 continue
2025-07-01 05:46:21.488 cruncher.set_seq1(ai)
2025-07-01 05:46:21.500 # computing similarity is expensive, so use the quick
2025-07-01 05:46:21.509 # upper bounds first -- have seen this speed up messy
2025-07-01 05:46:21.516 # compares by a factor of 3.
2025-07-01 05:46:21.524 # note that ratio() is only expensive to compute the first
2025-07-01 05:46:21.533 # time it's called on a sequence pair; the expensive part
2025-07-01 05:46:21.546 # of the computation is cached by cruncher
2025-07-01 05:46:21.555 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:46:21.563 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:46:21.568 cruncher.ratio() > best_ratio:
2025-07-01 05:46:21.574 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:46:21.579 if best_ratio < cutoff:
2025-07-01 05:46:21.585 # no non-identical "pretty close" pair
2025-07-01 05:46:21.591 if eqi is None:
2025-07-01 05:46:21.598 # no identical pair either -- treat it as a straight replace
2025-07-01 05:46:21.606 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:46:21.613 return
2025-07-01 05:46:21.618 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:46:21.624 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:46:21.630 else:
2025-07-01 05:46:21.636 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:46:21.642 eqi = None
2025-07-01 05:46:21.648
2025-07-01 05:46:21.654 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:46:21.664 # identical
2025-07-01 05:46:21.673
2025-07-01 05:46:21.681 # pump out diffs from before the synch point
2025-07-01 05:46:21.687 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:46:21.692
2025-07-01 05:46:21.698 # do intraline marking on the synch pair
2025-07-01 05:46:21.703 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:46:21.709 if eqi is None:
2025-07-01 05:46:21.715 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:46:21.726 atags = btags = ""
2025-07-01 05:46:21.734 cruncher.set_seqs(aelt, belt)
2025-07-01 05:46:21.739 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:46:21.745 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:46:21.751 if tag == 'replace':
2025-07-01 05:46:21.756 atags += '^' * la
2025-07-01 05:46:21.762 btags += '^' * lb
2025-07-01 05:46:21.768 elif tag == 'delete':
2025-07-01 05:46:21.774 atags += '-' * la
2025-07-01 05:46:21.779 elif tag == 'insert':
2025-07-01 05:46:21.785 btags += '+' * lb
2025-07-01 05:46:21.791 elif tag == 'equal':
2025-07-01 05:46:21.796 atags += ' ' * la
2025-07-01 05:46:21.801 btags += ' ' * lb
2025-07-01 05:46:21.806 else:
2025-07-01 05:46:21.810 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:46:21.816 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:46:21.821 else:
2025-07-01 05:46:21.826 # the synch pair is identical
2025-07-01 05:46:21.832 yield ' ' + aelt
2025-07-01 05:46:21.838
2025-07-01 05:46:21.845 # pump out diffs from after the synch point
2025-07-01 05:46:21.851 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:46:21.857
2025-07-01 05:46:21.863 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:46:21.870 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:46:21.877
2025-07-01 05:46:21.883 self = <difflib.Differ object at [hex]>
2025-07-01 05:46:21.888 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:46:21.892 alo = 155, ahi = 1101
2025-07-01 05:46:21.898 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:46:21.902 blo = 155, bhi = 1101
2025-07-01 05:46:21.906
2025-07-01 05:46:21.911 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:46:21.918 g = []
2025-07-01 05:46:21.923 if alo < ahi:
2025-07-01 05:46:21.928 if blo < bhi:
2025-07-01 05:46:21.934 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:46:21.939 else:
2025-07-01 05:46:21.945 g = self._dump('-', a, alo, ahi)
2025-07-01 05:46:21.951 elif blo < bhi:
2025-07-01 05:46:21.957 g = self._dump('+', b, blo, bhi)
2025-07-01 05:46:21.963
2025-07-01 05:46:21.968 > yield from g
2025-07-01 05:46:21.974
2025-07-01 05:46:21.980 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:46:21.986 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:46:21.994
2025-07-01 05:46:22.001 self = <difflib.Differ object at [hex]>
2025-07-01 05:46:22.008 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:46:22.013 alo = 155, ahi = 1101
2025-07-01 05:46:22.019 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:46:22.025 blo = 155, bhi = 1101
2025-07-01 05:46:22.030
2025-07-01 05:46:22.041 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:46:22.052 r"""
2025-07-01 05:46:22.059 When replacing one block of lines with another, search the blocks
2025-07-01 05:46:22.067 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:46:22.073 synch point, and intraline difference marking is done on the
2025-07-01 05:46:22.079 similar pair. Lots of work, but often worth it.
2025-07-01 05:46:22.084
2025-07-01 05:46:22.089 Example:
2025-07-01 05:46:22.095
2025-07-01 05:46:22.101 >>> d = Differ()
2025-07-01 05:46:22.107 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:46:22.113 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:46:22.119 >>> print(''.join(results), end="")
2025-07-01 05:46:22.127 - abcDefghiJkl
2025-07-01 05:46:22.139 + abcdefGhijkl
2025-07-01 05:46:22.151 """
2025-07-01 05:46:22.157
2025-07-01 05:46:22.163 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:46:22.169 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:46:22.175 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:46:22.181 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:46:22.188 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:46:22.194
2025-07-01 05:46:22.200 # search for the pair that matches best without being identical
2025-07-01 05:46:22.207 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:46:22.213 # on junk -- unless we have to)
2025-07-01 05:46:22.219 for j in range(blo, bhi):
2025-07-01 05:46:22.225 bj = b[j]
2025-07-01 05:46:22.230 cruncher.set_seq2(bj)
2025-07-01 05:46:22.236 for i in range(alo, ahi):
2025-07-01 05:46:22.242 ai = a[i]
2025-07-01 05:46:22.248 if ai == bj:
2025-07-01 05:46:22.254 if eqi is None:
2025-07-01 05:46:22.260 eqi, eqj = i, j
2025-07-01 05:46:22.266 continue
2025-07-01 05:46:22.272 cruncher.set_seq1(ai)
2025-07-01 05:46:22.278 # computing similarity is expensive, so use the quick
2025-07-01 05:46:22.285 # upper bounds first -- have seen this speed up messy
2025-07-01 05:46:22.291 # compares by a factor of 3.
2025-07-01 05:46:22.297 # note that ratio() is only expensive to compute the first
2025-07-01 05:46:22.303 # time it's called on a sequence pair; the expensive part
2025-07-01 05:46:22.309 # of the computation is cached by cruncher
2025-07-01 05:46:22.315 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:46:22.321 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:46:22.327 cruncher.ratio() > best_ratio:
2025-07-01 05:46:22.333 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:46:22.340 if best_ratio < cutoff:
2025-07-01 05:46:22.351 # no non-identical "pretty close" pair
2025-07-01 05:46:22.358 if eqi is None:
2025-07-01 05:46:22.366 # no identical pair either -- treat it as a straight replace
2025-07-01 05:46:22.375 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:46:22.384 return
2025-07-01 05:46:22.391 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:46:22.397 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:46:22.402 else:
2025-07-01 05:46:22.410 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:46:22.416 eqi = None
2025-07-01 05:46:22.422
2025-07-01 05:46:22.430 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:46:22.437 # identical
2025-07-01 05:46:22.443
2025-07-01 05:46:22.448 # pump out diffs from before the synch point
2025-07-01 05:46:22.453 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:46:22.458
2025-07-01 05:46:22.463 # do intraline marking on the synch pair
2025-07-01 05:46:22.469 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:46:22.475 if eqi is None:
2025-07-01 05:46:22.481 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:46:22.487 atags = btags = ""
2025-07-01 05:46:22.494 cruncher.set_seqs(aelt, belt)
2025-07-01 05:46:22.504 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:46:22.512 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:46:22.518 if tag == 'replace':
2025-07-01 05:46:22.523 atags += '^' * la
2025-07-01 05:46:22.528 btags += '^' * lb
2025-07-01 05:46:22.534 elif tag == 'delete':
2025-07-01 05:46:22.540 atags += '-' * la
2025-07-01 05:46:22.545 elif tag == 'insert':
2025-07-01 05:46:22.551 btags += '+' * lb
2025-07-01 05:46:22.557 elif tag == 'equal':
2025-07-01 05:46:22.563 atags += ' ' * la
2025-07-01 05:46:22.569 btags += ' ' * lb
2025-07-01 05:46:22.575 else:
2025-07-01 05:46:22.581 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:46:22.587 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:46:22.592 else:
2025-07-01 05:46:22.598 # the synch pair is identical
2025-07-01 05:46:22.604 yield ' ' + aelt
2025-07-01 05:46:22.610
2025-07-01 05:46:22.616 # pump out diffs from after the synch point
2025-07-01 05:46:22.622 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:46:22.627
2025-07-01 05:46:22.633 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:46:22.640 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:46:22.646
2025-07-01 05:46:22.652 self = <difflib.Differ object at [hex]>
2025-07-01 05:46:22.658 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:46:22.664 alo = 158, ahi = 1101
2025-07-01 05:46:22.670 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:46:22.676 blo = 158, bhi = 1101
2025-07-01 05:46:22.681
2025-07-01 05:46:22.687 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:46:22.693 g = []
2025-07-01 05:46:22.704 if alo < ahi:
2025-07-01 05:46:22.713 if blo < bhi:
2025-07-01 05:46:22.721 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:46:22.728 else:
2025-07-01 05:46:22.734 g = self._dump('-', a, alo, ahi)
2025-07-01 05:46:22.740 elif blo < bhi:
2025-07-01 05:46:22.746 g = self._dump('+', b, blo, bhi)
2025-07-01 05:46:22.752
2025-07-01 05:46:22.758 > yield from g
2025-07-01 05:46:22.764
2025-07-01 05:46:22.770 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:46:22.780 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:46:22.789
2025-07-01 05:46:22.796 self = <difflib.Differ object at [hex]>
2025-07-01 05:46:22.802 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:46:22.807 alo = 158, ahi = 1101
2025-07-01 05:46:22.814 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:46:22.823 blo = 158, bhi = 1101
2025-07-01 05:46:22.831
2025-07-01 05:46:22.837 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:46:22.843 r"""
2025-07-01 05:46:22.848 When replacing one block of lines with another, search the blocks
2025-07-01 05:46:22.861 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:46:22.873 synch point, and intraline difference marking is done on the
2025-07-01 05:46:22.885 similar pair. Lots of work, but often worth it.
2025-07-01 05:46:22.897
2025-07-01 05:46:22.907 Example:
2025-07-01 05:46:22.919
2025-07-01 05:46:22.931 >>> d = Differ()
2025-07-01 05:46:22.939 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:46:22.949 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:46:22.961 >>> print(''.join(results), end="")
2025-07-01 05:46:22.972 - abcDefghiJkl
2025-07-01 05:46:22.988 + abcdefGhijkl
2025-07-01 05:46:23.001 """
2025-07-01 05:46:23.009
2025-07-01 05:46:23.015 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:46:23.020 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:46:23.024 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:46:23.029 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:46:23.033 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:46:23.037
2025-07-01 05:46:23.041 # search for the pair that matches best without being identical
2025-07-01 05:46:23.046 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:46:23.050 # on junk -- unless we have to)
2025-07-01 05:46:23.055 for j in range(blo, bhi):
2025-07-01 05:46:23.059 bj = b[j]
2025-07-01 05:46:23.063 cruncher.set_seq2(bj)
2025-07-01 05:46:23.067 for i in range(alo, ahi):
2025-07-01 05:46:23.072 ai = a[i]
2025-07-01 05:46:23.078 if ai == bj:
2025-07-01 05:46:23.083 if eqi is None:
2025-07-01 05:46:23.087 eqi, eqj = i, j
2025-07-01 05:46:23.092 continue
2025-07-01 05:46:23.096 cruncher.set_seq1(ai)
2025-07-01 05:46:23.101 # computing similarity is expensive, so use the quick
2025-07-01 05:46:23.105 # upper bounds first -- have seen this speed up messy
2025-07-01 05:46:23.110 # compares by a factor of 3.
2025-07-01 05:46:23.115 # note that ratio() is only expensive to compute the first
2025-07-01 05:46:23.122 # time it's called on a sequence pair; the expensive part
2025-07-01 05:46:23.131 # of the computation is cached by cruncher
2025-07-01 05:46:23.137 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:46:23.144 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:46:23.151 cruncher.ratio() > best_ratio:
2025-07-01 05:46:23.159 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:46:23.170 if best_ratio < cutoff:
2025-07-01 05:46:23.179 # no non-identical "pretty close" pair
2025-07-01 05:46:23.186 if eqi is None:
2025-07-01 05:46:23.193 # no identical pair either -- treat it as a straight replace
2025-07-01 05:46:23.199 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:46:23.205 return
2025-07-01 05:46:23.211 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:46:23.217 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:46:23.222 else:
2025-07-01 05:46:23.233 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:46:23.245 eqi = None
2025-07-01 05:46:23.255
2025-07-01 05:46:23.264 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:46:23.271 # identical
2025-07-01 05:46:23.278
2025-07-01 05:46:23.284 # pump out diffs from before the synch point
2025-07-01 05:46:23.289 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:46:23.297
2025-07-01 05:46:23.305 # do intraline marking on the synch pair
2025-07-01 05:46:23.311 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:46:23.316 if eqi is None:
2025-07-01 05:46:23.321 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:46:23.325 atags = btags = ""
2025-07-01 05:46:23.330 cruncher.set_seqs(aelt, belt)
2025-07-01 05:46:23.334 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:46:23.338 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:46:23.343 if tag == 'replace':
2025-07-01 05:46:23.347 atags += '^' * la
2025-07-01 05:46:23.351 btags += '^' * lb
2025-07-01 05:46:23.356 elif tag == 'delete':
2025-07-01 05:46:23.360 atags += '-' * la
2025-07-01 05:46:23.364 elif tag == 'insert':
2025-07-01 05:46:23.368 btags += '+' * lb
2025-07-01 05:46:23.373 elif tag == 'equal':
2025-07-01 05:46:23.377 atags += ' ' * la
2025-07-01 05:46:23.382 btags += ' ' * lb
2025-07-01 05:46:23.386 else:
2025-07-01 05:46:23.390 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:46:23.394 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:46:23.399 else:
2025-07-01 05:46:23.405 # the synch pair is identical
2025-07-01 05:46:23.409 yield ' ' + aelt
2025-07-01 05:46:23.413
2025-07-01 05:46:23.417 # pump out diffs from after the synch point
2025-07-01 05:46:23.422 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:46:23.426
2025-07-01 05:46:23.430 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:46:23.435 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:46:23.439
2025-07-01 05:46:23.444 self = <difflib.Differ object at [hex]>
2025-07-01 05:46:23.449 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:46:23.453 alo = 159, ahi = 1101
2025-07-01 05:46:23.458 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:46:23.462 blo = 159, bhi = 1101
2025-07-01 05:46:23.466
2025-07-01 05:46:23.470 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:46:23.475 g = []
2025-07-01 05:46:23.482 if alo < ahi:
2025-07-01 05:46:23.489 if blo < bhi:
2025-07-01 05:46:23.495 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:46:23.501 else:
2025-07-01 05:46:23.508 g = self._dump('-', a, alo, ahi)
2025-07-01 05:46:23.519 elif blo < bhi:
2025-07-01 05:46:23.529 g = self._dump('+', b, blo, bhi)
2025-07-01 05:46:23.536
2025-07-01 05:46:23.543 > yield from g
2025-07-01 05:46:23.550
2025-07-01 05:46:23.561 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:46:23.571 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:46:23.580
2025-07-01 05:46:23.586 self = <difflib.Differ object at [hex]>
2025-07-01 05:46:23.594 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:46:23.601 alo = 159, ahi = 1101
2025-07-01 05:46:23.607 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:46:23.611 blo = 159, bhi = 1101
2025-07-01 05:46:23.615
2025-07-01 05:46:23.620 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:46:23.625 r"""
2025-07-01 05:46:23.629 When replacing one block of lines with another, search the blocks
2025-07-01 05:46:23.633 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:46:23.638 synch point, and intraline difference marking is done on the
2025-07-01 05:46:23.642 similar pair. Lots of work, but often worth it.
2025-07-01 05:46:23.649
2025-07-01 05:46:23.657 Example:
2025-07-01 05:46:23.665
2025-07-01 05:46:23.672 >>> d = Differ()
2025-07-01 05:46:23.679 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:46:23.686 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:46:23.691 >>> print(''.join(results), end="")
2025-07-01 05:46:23.696 - abcDefghiJkl
2025-07-01 05:46:23.718 + abcdefGhijkl
2025-07-01 05:46:23.739 """
2025-07-01 05:46:23.749
2025-07-01 05:46:23.758 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:46:23.766 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:46:23.773 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:46:23.781 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:46:23.787 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:46:23.792
2025-07-01 05:46:23.798 # search for the pair that matches best without being identical
2025-07-01 05:46:23.805 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:46:23.817 # on junk -- unless we have to)
2025-07-01 05:46:23.827 for j in range(blo, bhi):
2025-07-01 05:46:23.835 bj = b[j]
2025-07-01 05:46:23.842 cruncher.set_seq2(bj)
2025-07-01 05:46:23.849 for i in range(alo, ahi):
2025-07-01 05:46:23.854 ai = a[i]
2025-07-01 05:46:23.858 if ai == bj:
2025-07-01 05:46:23.863 if eqi is None:
2025-07-01 05:46:23.867 eqi, eqj = i, j
2025-07-01 05:46:23.871 continue
2025-07-01 05:46:23.875 cruncher.set_seq1(ai)
2025-07-01 05:46:23.879 # computing similarity is expensive, so use the quick
2025-07-01 05:46:23.884 # upper bounds first -- have seen this speed up messy
2025-07-01 05:46:23.888 # compares by a factor of 3.
2025-07-01 05:46:23.893 # note that ratio() is only expensive to compute the first
2025-07-01 05:46:23.897 # time it's called on a sequence pair; the expensive part
2025-07-01 05:46:23.903 # of the computation is cached by cruncher
2025-07-01 05:46:23.908 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:46:23.914 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:46:23.919 cruncher.ratio() > best_ratio:
2025-07-01 05:46:23.925 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:46:23.930 if best_ratio < cutoff:
2025-07-01 05:46:23.935 # no non-identical "pretty close" pair
2025-07-01 05:46:23.939 if eqi is None:
2025-07-01 05:46:23.943 # no identical pair either -- treat it as a straight replace
2025-07-01 05:46:23.947 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:46:23.952 return
2025-07-01 05:46:23.956 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:46:23.960 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:46:23.965 else:
2025-07-01 05:46:23.969 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:46:23.973 eqi = None
2025-07-01 05:46:23.978
2025-07-01 05:46:23.989 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:46:23.996 # identical
2025-07-01 05:46:24.002
2025-07-01 05:46:24.008 # pump out diffs from before the synch point
2025-07-01 05:46:24.014 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:46:24.024
2025-07-01 05:46:24.034 # do intraline marking on the synch pair
2025-07-01 05:46:24.041 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:46:24.047 if eqi is None:
2025-07-01 05:46:24.052 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:46:24.056 atags = btags = ""
2025-07-01 05:46:24.061 cruncher.set_seqs(aelt, belt)
2025-07-01 05:46:24.067 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:46:24.072 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:46:24.078 if tag == 'replace':
2025-07-01 05:46:24.084 atags += '^' * la
2025-07-01 05:46:24.090 btags += '^' * lb
2025-07-01 05:46:24.103 elif tag == 'delete':
2025-07-01 05:46:24.114 atags += '-' * la
2025-07-01 05:46:24.125 elif tag == 'insert':
2025-07-01 05:46:24.135 btags += '+' * lb
2025-07-01 05:46:24.141 elif tag == 'equal':
2025-07-01 05:46:24.147 atags += ' ' * la
2025-07-01 05:46:24.158 btags += ' ' * lb
2025-07-01 05:46:24.165 else:
2025-07-01 05:46:24.171 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:46:24.179 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:46:24.186 else:
2025-07-01 05:46:24.193 # the synch pair is identical
2025-07-01 05:46:24.199 yield ' ' + aelt
2025-07-01 05:46:24.205
2025-07-01 05:46:24.211 # pump out diffs from after the synch point
2025-07-01 05:46:24.217 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:46:24.223
2025-07-01 05:46:24.229 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:46:24.235 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:46:24.240
2025-07-01 05:46:24.247 self = <difflib.Differ object at [hex]>
2025-07-01 05:46:24.255 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:46:24.263 alo = 160, ahi = 1101
2025-07-01 05:46:24.274 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:46:24.281 blo = 160, bhi = 1101
2025-07-01 05:46:24.288
2025-07-01 05:46:24.294 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:46:24.304 g = []
2025-07-01 05:46:24.314 if alo < ahi:
2025-07-01 05:46:24.321 if blo < bhi:
2025-07-01 05:46:24.327 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:46:24.334 else:
2025-07-01 05:46:24.340 g = self._dump('-', a, alo, ahi)
2025-07-01 05:46:24.347 elif blo < bhi:
2025-07-01 05:46:24.353 g = self._dump('+', b, blo, bhi)
2025-07-01 05:46:24.358
2025-07-01 05:46:24.363 > yield from g
2025-07-01 05:46:24.369
2025-07-01 05:46:24.374 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:46:24.385 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:46:24.395
2025-07-01 05:46:24.403 self = <difflib.Differ object at [hex]>
2025-07-01 05:46:24.414 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:46:24.422 alo = 160, ahi = 1101
2025-07-01 05:46:24.430 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:46:24.438 blo = 160, bhi = 1101
2025-07-01 05:46:24.444
2025-07-01 05:46:24.451 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:46:24.457 r"""
2025-07-01 05:46:24.463 When replacing one block of lines with another, search the blocks
2025-07-01 05:46:24.467 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:46:24.472 synch point, and intraline difference marking is done on the
2025-07-01 05:46:24.476 similar pair. Lots of work, but often worth it.
2025-07-01 05:46:24.480
2025-07-01 05:46:24.485 Example:
2025-07-01 05:46:24.492
2025-07-01 05:46:24.498 >>> d = Differ()
2025-07-01 05:46:24.504 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:46:24.509 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:46:24.514 >>> print(''.join(results), end="")
2025-07-01 05:46:24.524 - abcDefghiJkl
2025-07-01 05:46:24.540 + abcdefGhijkl
2025-07-01 05:46:24.557 """
2025-07-01 05:46:24.564
2025-07-01 05:46:24.570 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:46:24.580 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:46:24.591 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:46:24.600 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:46:24.611 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:46:24.621
2025-07-01 05:46:24.631 # search for the pair that matches best without being identical
2025-07-01 05:46:24.641 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:46:24.651 # on junk -- unless we have to)
2025-07-01 05:46:24.661 for j in range(blo, bhi):
2025-07-01 05:46:24.668 bj = b[j]
2025-07-01 05:46:24.675 cruncher.set_seq2(bj)
2025-07-01 05:46:24.682 for i in range(alo, ahi):
2025-07-01 05:46:24.688 ai = a[i]
2025-07-01 05:46:24.695 if ai == bj:
2025-07-01 05:46:24.702 if eqi is None:
2025-07-01 05:46:24.708 eqi, eqj = i, j
2025-07-01 05:46:24.715 continue
2025-07-01 05:46:24.724 cruncher.set_seq1(ai)
2025-07-01 05:46:24.737 # computing similarity is expensive, so use the quick
2025-07-01 05:46:24.746 # upper bounds first -- have seen this speed up messy
2025-07-01 05:46:24.753 # compares by a factor of 3.
2025-07-01 05:46:24.760 # note that ratio() is only expensive to compute the first
2025-07-01 05:46:24.766 # time it's called on a sequence pair; the expensive part
2025-07-01 05:46:24.771 # of the computation is cached by cruncher
2025-07-01 05:46:24.776 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:46:24.780 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:46:24.785 cruncher.ratio() > best_ratio:
2025-07-01 05:46:24.792 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:46:24.799 if best_ratio < cutoff:
2025-07-01 05:46:24.806 # no non-identical "pretty close" pair
2025-07-01 05:46:24.813 if eqi is None:
2025-07-01 05:46:24.819 # no identical pair either -- treat it as a straight replace
2025-07-01 05:46:24.825 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:46:24.832 return
2025-07-01 05:46:24.844 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:46:24.852 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:46:24.858 else:
2025-07-01 05:46:24.864 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:46:24.873 eqi = None
2025-07-01 05:46:24.880
2025-07-01 05:46:24.887 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:46:24.894 # identical
2025-07-01 05:46:24.904
2025-07-01 05:46:24.914 # pump out diffs from before the synch point
2025-07-01 05:46:24.921 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:46:24.927
2025-07-01 05:46:24.932 # do intraline marking on the synch pair
2025-07-01 05:46:24.938 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:46:24.942 if eqi is None:
2025-07-01 05:46:24.947 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:46:24.952 atags = btags = ""
2025-07-01 05:46:24.956 cruncher.set_seqs(aelt, belt)
2025-07-01 05:46:24.963 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:46:24.970 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:46:24.977 if tag == 'replace':
2025-07-01 05:46:24.984 atags += '^' * la
2025-07-01 05:46:24.990 btags += '^' * lb
2025-07-01 05:46:24.997 elif tag == 'delete':
2025-07-01 05:46:25.004 atags += '-' * la
2025-07-01 05:46:25.010 elif tag == 'insert':
2025-07-01 05:46:25.017 btags += '+' * lb
2025-07-01 05:46:25.024 elif tag == 'equal':
2025-07-01 05:46:25.030 atags += ' ' * la
2025-07-01 05:46:25.041 btags += ' ' * lb
2025-07-01 05:46:25.050 else:
2025-07-01 05:46:25.057 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:46:25.063 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:46:25.069 else:
2025-07-01 05:46:25.074 # the synch pair is identical
2025-07-01 05:46:25.082 yield ' ' + aelt
2025-07-01 05:46:25.089
2025-07-01 05:46:25.094 # pump out diffs from after the synch point
2025-07-01 05:46:25.101 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:46:25.107
2025-07-01 05:46:25.113 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:46:25.119 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:46:25.124
2025-07-01 05:46:25.130 self = <difflib.Differ object at [hex]>
2025-07-01 05:46:25.135 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:46:25.141 alo = 161, ahi = 1101
2025-07-01 05:46:25.147 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:46:25.153 blo = 161, bhi = 1101
2025-07-01 05:46:25.161
2025-07-01 05:46:25.167 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:46:25.174 g = []
2025-07-01 05:46:25.181 if alo < ahi:
2025-07-01 05:46:25.187 if blo < bhi:
2025-07-01 05:46:25.195 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:46:25.202 else:
2025-07-01 05:46:25.209 g = self._dump('-', a, alo, ahi)
2025-07-01 05:46:25.215 elif blo < bhi:
2025-07-01 05:46:25.222 g = self._dump('+', b, blo, bhi)
2025-07-01 05:46:25.232
2025-07-01 05:46:25.243 > yield from g
2025-07-01 05:46:25.252
2025-07-01 05:46:25.259 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:46:25.270 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:46:25.281
2025-07-01 05:46:25.292 self = <difflib.Differ object at [hex]>
2025-07-01 05:46:25.302 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:46:25.311 alo = 161, ahi = 1101
2025-07-01 05:46:25.322 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:46:25.330 blo = 161, bhi = 1101
2025-07-01 05:46:25.338
2025-07-01 05:46:25.343 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:46:25.349 r"""
2025-07-01 05:46:25.355 When replacing one block of lines with another, search the blocks
2025-07-01 05:46:25.362 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:46:25.369 synch point, and intraline difference marking is done on the
2025-07-01 05:46:25.379 similar pair. Lots of work, but often worth it.
2025-07-01 05:46:25.390
2025-07-01 05:46:25.402 Example:
2025-07-01 05:46:25.412
2025-07-01 05:46:25.421 >>> d = Differ()
2025-07-01 05:46:25.432 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:46:25.442 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:46:25.454 >>> print(''.join(results), end="")
2025-07-01 05:46:25.464 - abcDefghiJkl
2025-07-01 05:46:25.480 + abcdefGhijkl
2025-07-01 05:46:25.496 """
2025-07-01 05:46:25.506
2025-07-01 05:46:25.513 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:46:25.524 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:46:25.536 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:46:25.546 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:46:25.554 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:46:25.562
2025-07-01 05:46:25.568 # search for the pair that matches best without being identical
2025-07-01 05:46:25.574 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:46:25.580 # on junk -- unless we have to)
2025-07-01 05:46:25.586 for j in range(blo, bhi):
2025-07-01 05:46:25.595 bj = b[j]
2025-07-01 05:46:25.604 cruncher.set_seq2(bj)
2025-07-01 05:46:25.613 for i in range(alo, ahi):
2025-07-01 05:46:25.620 ai = a[i]
2025-07-01 05:46:25.626 if ai == bj:
2025-07-01 05:46:25.634 if eqi is None:
2025-07-01 05:46:25.643 eqi, eqj = i, j
2025-07-01 05:46:25.653 continue
2025-07-01 05:46:25.665 cruncher.set_seq1(ai)
2025-07-01 05:46:25.675 # computing similarity is expensive, so use the quick
2025-07-01 05:46:25.681 # upper bounds first -- have seen this speed up messy
2025-07-01 05:46:25.688 # compares by a factor of 3.
2025-07-01 05:46:25.696 # note that ratio() is only expensive to compute the first
2025-07-01 05:46:25.702 # time it's called on a sequence pair; the expensive part
2025-07-01 05:46:25.710 # of the computation is cached by cruncher
2025-07-01 05:46:25.718 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:46:25.725 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:46:25.731 cruncher.ratio() > best_ratio:
2025-07-01 05:46:25.736 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:46:25.741 if best_ratio < cutoff:
2025-07-01 05:46:25.745 # no non-identical "pretty close" pair
2025-07-01 05:46:25.750 if eqi is None:
2025-07-01 05:46:25.754 # no identical pair either -- treat it as a straight replace
2025-07-01 05:46:25.759 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:46:25.763 return
2025-07-01 05:46:25.768 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:46:25.773 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:46:25.777 else:
2025-07-01 05:46:25.782 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:46:25.787 eqi = None
2025-07-01 05:46:25.791
2025-07-01 05:46:25.796 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:46:25.800 # identical
2025-07-01 05:46:25.805
2025-07-01 05:46:25.811 # pump out diffs from before the synch point
2025-07-01 05:46:25.817 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:46:25.823
2025-07-01 05:46:25.830 # do intraline marking on the synch pair
2025-07-01 05:46:25.837 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:46:25.843 if eqi is None:
2025-07-01 05:46:25.850 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:46:25.861 atags = btags = ""
2025-07-01 05:46:25.870 cruncher.set_seqs(aelt, belt)
2025-07-01 05:46:25.878 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:46:25.884 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:46:25.890 if tag == 'replace':
2025-07-01 05:46:25.896 atags += '^' * la
2025-07-01 05:46:25.902 btags += '^' * lb
2025-07-01 05:46:25.909 elif tag == 'delete':
2025-07-01 05:46:25.916 atags += '-' * la
2025-07-01 05:46:25.923 elif tag == 'insert':
2025-07-01 05:46:25.930 btags += '+' * lb
2025-07-01 05:46:25.937 elif tag == 'equal':
2025-07-01 05:46:25.944 atags += ' ' * la
2025-07-01 05:46:25.950 btags += ' ' * lb
2025-07-01 05:46:25.960 else:
2025-07-01 05:46:25.969 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:46:25.977 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:46:25.982 else:
2025-07-01 05:46:25.988 # the synch pair is identical
2025-07-01 05:46:25.993 yield ' ' + aelt
2025-07-01 05:46:25.999
2025-07-01 05:46:26.005 # pump out diffs from after the synch point
2025-07-01 05:46:26.010 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:46:26.016
2025-07-01 05:46:26.021 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:46:26.027 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:46:26.033
2025-07-01 05:46:26.039 self = <difflib.Differ object at [hex]>
2025-07-01 05:46:26.047 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:46:26.054 alo = 162, ahi = 1101
2025-07-01 05:46:26.064 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:46:26.075 blo = 162, bhi = 1101
2025-07-01 05:46:26.083
2025-07-01 05:46:26.089 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:46:26.095 g = []
2025-07-01 05:46:26.101 if alo < ahi:
2025-07-01 05:46:26.107 if blo < bhi:
2025-07-01 05:46:26.115 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:46:26.123 else:
2025-07-01 05:46:26.133 g = self._dump('-', a, alo, ahi)
2025-07-01 05:46:26.140 elif blo < bhi:
2025-07-01 05:46:26.147 g = self._dump('+', b, blo, bhi)
2025-07-01 05:46:26.152
2025-07-01 05:46:26.157 > yield from g
2025-07-01 05:46:26.162
2025-07-01 05:46:26.167 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:46:26.172 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:46:26.177
2025-07-01 05:46:26.181 self = <difflib.Differ object at [hex]>
2025-07-01 05:46:26.187 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:46:26.192 alo = 162, ahi = 1101
2025-07-01 05:46:26.197 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:46:26.202 blo = 162, bhi = 1101
2025-07-01 05:46:26.206
2025-07-01 05:46:26.211 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:46:26.216 r"""
2025-07-01 05:46:26.222 When replacing one block of lines with another, search the blocks
2025-07-01 05:46:26.228 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:46:26.234 synch point, and intraline difference marking is done on the
2025-07-01 05:46:26.242 similar pair. Lots of work, but often worth it.
2025-07-01 05:46:26.249
2025-07-01 05:46:26.255 Example:
2025-07-01 05:46:26.262
2025-07-01 05:46:26.268 >>> d = Differ()
2025-07-01 05:46:26.275 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:46:26.281 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:46:26.287 >>> print(''.join(results), end="")
2025-07-01 05:46:26.294 - abcDefghiJkl
2025-07-01 05:46:26.307 + abcdefGhijkl
2025-07-01 05:46:26.329 """
2025-07-01 05:46:26.339
2025-07-01 05:46:26.353 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:46:26.368 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:46:26.379 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:46:26.386 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:46:26.392 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:46:26.402
2025-07-01 05:46:26.408 # search for the pair that matches best without being identical
2025-07-01 05:46:26.413 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:46:26.419 # on junk -- unless we have to)
2025-07-01 05:46:26.424 for j in range(blo, bhi):
2025-07-01 05:46:26.430 bj = b[j]
2025-07-01 05:46:26.436 cruncher.set_seq2(bj)
2025-07-01 05:46:26.442 for i in range(alo, ahi):
2025-07-01 05:46:26.449 ai = a[i]
2025-07-01 05:46:26.455 if ai == bj:
2025-07-01 05:46:26.460 if eqi is None:
2025-07-01 05:46:26.466 eqi, eqj = i, j
2025-07-01 05:46:26.472 continue
2025-07-01 05:46:26.479 cruncher.set_seq1(ai)
2025-07-01 05:46:26.487 # computing similarity is expensive, so use the quick
2025-07-01 05:46:26.495 # upper bounds first -- have seen this speed up messy
2025-07-01 05:46:26.505 # compares by a factor of 3.
2025-07-01 05:46:26.514 # note that ratio() is only expensive to compute the first
2025-07-01 05:46:26.521 # time it's called on a sequence pair; the expensive part
2025-07-01 05:46:26.526 # of the computation is cached by cruncher
2025-07-01 05:46:26.531 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:46:26.536 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:46:26.541 cruncher.ratio() > best_ratio:
2025-07-01 05:46:26.546 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:46:26.551 if best_ratio < cutoff:
2025-07-01 05:46:26.556 # no non-identical "pretty close" pair
2025-07-01 05:46:26.561 if eqi is None:
2025-07-01 05:46:26.567 # no identical pair either -- treat it as a straight replace
2025-07-01 05:46:26.575 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:46:26.583 return
2025-07-01 05:46:26.590 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:46:26.596 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:46:26.602 else:
2025-07-01 05:46:26.609 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:46:26.614 eqi = None
2025-07-01 05:46:26.619
2025-07-01 05:46:26.624 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:46:26.629 # identical
2025-07-01 05:46:26.634
2025-07-01 05:46:26.639 # pump out diffs from before the synch point
2025-07-01 05:46:26.645 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:46:26.650
2025-07-01 05:46:26.655 # do intraline marking on the synch pair
2025-07-01 05:46:26.661 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:46:26.666 if eqi is None:
2025-07-01 05:46:26.671 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:46:26.675 atags = btags = ""
2025-07-01 05:46:26.681 cruncher.set_seqs(aelt, belt)
2025-07-01 05:46:26.687 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:46:26.693 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:46:26.699 if tag == 'replace':
2025-07-01 05:46:26.710 atags += '^' * la
2025-07-01 05:46:26.717 btags += '^' * lb
2025-07-01 05:46:26.723 elif tag == 'delete':
2025-07-01 05:46:26.733 atags += '-' * la
2025-07-01 05:46:26.745 elif tag == 'insert':
2025-07-01 05:46:26.753 btags += '+' * lb
2025-07-01 05:46:26.759 elif tag == 'equal':
2025-07-01 05:46:26.766 atags += ' ' * la
2025-07-01 05:46:26.776 btags += ' ' * lb
2025-07-01 05:46:26.786 else:
2025-07-01 05:46:26.793 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:46:26.799 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:46:26.804 else:
2025-07-01 05:46:26.809 # the synch pair is identical
2025-07-01 05:46:26.813 yield ' ' + aelt
2025-07-01 05:46:26.818
2025-07-01 05:46:26.823 # pump out diffs from after the synch point
2025-07-01 05:46:26.828 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:46:26.834
2025-07-01 05:46:26.839 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:46:26.845 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:46:26.853
2025-07-01 05:46:26.860 self = <difflib.Differ object at [hex]>
2025-07-01 05:46:26.866 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:46:26.870 alo = 163, ahi = 1101
2025-07-01 05:46:26.876 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:46:26.883 blo = 163, bhi = 1101
2025-07-01 05:46:26.893
2025-07-01 05:46:26.900 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:46:26.906 g = []
2025-07-01 05:46:26.915 if alo < ahi:
2025-07-01 05:46:26.925 if blo < bhi:
2025-07-01 05:46:26.934 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:46:26.942 else:
2025-07-01 05:46:26.948 g = self._dump('-', a, alo, ahi)
2025-07-01 05:46:26.953 elif blo < bhi:
2025-07-01 05:46:26.959 g = self._dump('+', b, blo, bhi)
2025-07-01 05:46:26.964
2025-07-01 05:46:26.970 > yield from g
2025-07-01 05:46:26.975
2025-07-01 05:46:26.979 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:46:26.984 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:46:26.989
2025-07-01 05:46:26.994 self = <difflib.Differ object at [hex]>
2025-07-01 05:46:26.999 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:46:27.005 alo = 163, ahi = 1101
2025-07-01 05:46:27.011 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:46:27.017 blo = 163, bhi = 1101
2025-07-01 05:46:27.023
2025-07-01 05:46:27.029 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:46:27.036 r"""
2025-07-01 05:46:27.042 When replacing one block of lines with another, search the blocks
2025-07-01 05:46:27.049 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:46:27.056 synch point, and intraline difference marking is done on the
2025-07-01 05:46:27.063 similar pair. Lots of work, but often worth it.
2025-07-01 05:46:27.073
2025-07-01 05:46:27.083 Example:
2025-07-01 05:46:27.091
2025-07-01 05:46:27.100 >>> d = Differ()
2025-07-01 05:46:27.107 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:46:27.114 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:46:27.123 >>> print(''.join(results), end="")
2025-07-01 05:46:27.134 - abcDefghiJkl
2025-07-01 05:46:27.146 + abcdefGhijkl
2025-07-01 05:46:27.160 """
2025-07-01 05:46:27.166
2025-07-01 05:46:27.173 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:46:27.180 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:46:27.187 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:46:27.194 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:46:27.206 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:46:27.215
2025-07-01 05:46:27.221 # search for the pair that matches best without being identical
2025-07-01 05:46:27.227 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:46:27.233 # on junk -- unless we have to)
2025-07-01 05:46:27.240 for j in range(blo, bhi):
2025-07-01 05:46:27.247 bj = b[j]
2025-07-01 05:46:27.253 cruncher.set_seq2(bj)
2025-07-01 05:46:27.260 for i in range(alo, ahi):
2025-07-01 05:46:27.266 ai = a[i]
2025-07-01 05:46:27.272 if ai == bj:
2025-07-01 05:46:27.278 if eqi is None:
2025-07-01 05:46:27.283 eqi, eqj = i, j
2025-07-01 05:46:27.289 continue
2025-07-01 05:46:27.295 cruncher.set_seq1(ai)
2025-07-01 05:46:27.301 # computing similarity is expensive, so use the quick
2025-07-01 05:46:27.312 # upper bounds first -- have seen this speed up messy
2025-07-01 05:46:27.321 # compares by a factor of 3.
2025-07-01 05:46:27.329 # note that ratio() is only expensive to compute the first
2025-07-01 05:46:27.334 # time it's called on a sequence pair; the expensive part
2025-07-01 05:46:27.340 # of the computation is cached by cruncher
2025-07-01 05:46:27.344 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:46:27.349 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:46:27.354 cruncher.ratio() > best_ratio:
2025-07-01 05:46:27.360 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:46:27.366 if best_ratio < cutoff:
2025-07-01 05:46:27.371 # no non-identical "pretty close" pair
2025-07-01 05:46:27.377 if eqi is None:
2025-07-01 05:46:27.383 # no identical pair either -- treat it as a straight replace
2025-07-01 05:46:27.389 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:46:27.395 return
2025-07-01 05:46:27.401 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:46:27.407 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:46:27.412 else:
2025-07-01 05:46:27.418 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:46:27.424 eqi = None
2025-07-01 05:46:27.430
2025-07-01 05:46:27.436 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:46:27.442 # identical
2025-07-01 05:46:27.448
2025-07-01 05:46:27.454 # pump out diffs from before the synch point
2025-07-01 05:46:27.460 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:46:27.465
2025-07-01 05:46:27.471 # do intraline marking on the synch pair
2025-07-01 05:46:27.477 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:46:27.483 if eqi is None:
2025-07-01 05:46:27.488 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:46:27.494 atags = btags = ""
2025-07-01 05:46:27.500 cruncher.set_seqs(aelt, belt)
2025-07-01 05:46:27.506 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:46:27.511 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:46:27.517 if tag == 'replace':
2025-07-01 05:46:27.523 atags += '^' * la
2025-07-01 05:46:27.529 btags += '^' * lb
2025-07-01 05:46:27.534 elif tag == 'delete':
2025-07-01 05:46:27.540 atags += '-' * la
2025-07-01 05:46:27.546 elif tag == 'insert':
2025-07-01 05:46:27.552 btags += '+' * lb
2025-07-01 05:46:27.558 elif tag == 'equal':
2025-07-01 05:46:27.564 atags += ' ' * la
2025-07-01 05:46:27.569 btags += ' ' * lb
2025-07-01 05:46:27.578 else:
2025-07-01 05:46:27.586 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:46:27.593 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:46:27.599 else:
2025-07-01 05:46:27.603 # the synch pair is identical
2025-07-01 05:46:27.608 yield ' ' + aelt
2025-07-01 05:46:27.613
2025-07-01 05:46:27.617 # pump out diffs from after the synch point
2025-07-01 05:46:27.622 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:46:27.628
2025-07-01 05:46:27.636 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:46:27.648 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:46:27.656
2025-07-01 05:46:27.663 self = <difflib.Differ object at [hex]>
2025-07-01 05:46:27.673 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:46:27.684 alo = 164, ahi = 1101
2025-07-01 05:46:27.692 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:46:27.702 blo = 164, bhi = 1101
2025-07-01 05:46:27.714
2025-07-01 05:46:27.724 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:46:27.735 g = []
2025-07-01 05:46:27.745 if alo < ahi:
2025-07-01 05:46:27.755 if blo < bhi:
2025-07-01 05:46:27.764 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:46:27.777 else:
2025-07-01 05:46:27.790 g = self._dump('-', a, alo, ahi)
2025-07-01 05:46:27.801 elif blo < bhi:
2025-07-01 05:46:27.808 g = self._dump('+', b, blo, bhi)
2025-07-01 05:46:27.816
2025-07-01 05:46:27.822 > yield from g
2025-07-01 05:46:27.829
2025-07-01 05:46:27.837 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:46:27.849 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:46:27.859
2025-07-01 05:46:27.867 self = <difflib.Differ object at [hex]>
2025-07-01 05:46:27.876 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:46:27.884 alo = 164, ahi = 1101
2025-07-01 05:46:27.892 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:46:27.902 blo = 164, bhi = 1101
2025-07-01 05:46:27.910
2025-07-01 05:46:27.917 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:46:27.927 r"""
2025-07-01 05:46:27.935 When replacing one block of lines with another, search the blocks
2025-07-01 05:46:27.943 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:46:27.950 synch point, and intraline difference marking is done on the
2025-07-01 05:46:27.960 similar pair. Lots of work, but often worth it.
2025-07-01 05:46:27.969
2025-07-01 05:46:27.977 Example:
2025-07-01 05:46:27.982
2025-07-01 05:46:27.989 >>> d = Differ()
2025-07-01 05:46:27.995 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:46:28.000 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:46:28.006 >>> print(''.join(results), end="")
2025-07-01 05:46:28.016 - abcDefghiJkl
2025-07-01 05:46:28.032 + abcdefGhijkl
2025-07-01 05:46:28.044 """
2025-07-01 05:46:28.057
2025-07-01 05:46:28.068 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:46:28.076 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:46:28.082 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:46:28.087 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:46:28.092 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:46:28.097
2025-07-01 05:46:28.102 # search for the pair that matches best without being identical
2025-07-01 05:46:28.107 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:46:28.112 # on junk -- unless we have to)
2025-07-01 05:46:28.120 for j in range(blo, bhi):
2025-07-01 05:46:28.131 bj = b[j]
2025-07-01 05:46:28.141 cruncher.set_seq2(bj)
2025-07-01 05:46:28.147 for i in range(alo, ahi):
2025-07-01 05:46:28.156 ai = a[i]
2025-07-01 05:46:28.168 if ai == bj:
2025-07-01 05:46:28.179 if eqi is None:
2025-07-01 05:46:28.188 eqi, eqj = i, j
2025-07-01 05:46:28.195 continue
2025-07-01 05:46:28.202 cruncher.set_seq1(ai)
2025-07-01 05:46:28.208 # computing similarity is expensive, so use the quick
2025-07-01 05:46:28.214 # upper bounds first -- have seen this speed up messy
2025-07-01 05:46:28.220 # compares by a factor of 3.
2025-07-01 05:46:28.226 # note that ratio() is only expensive to compute the first
2025-07-01 05:46:28.236 # time it's called on a sequence pair; the expensive part
2025-07-01 05:46:28.247 # of the computation is cached by cruncher
2025-07-01 05:46:28.255 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:46:28.262 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:46:28.272 cruncher.ratio() > best_ratio:
2025-07-01 05:46:28.280 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:46:28.286 if best_ratio < cutoff:
2025-07-01 05:46:28.294 # no non-identical "pretty close" pair
2025-07-01 05:46:28.302 if eqi is None:
2025-07-01 05:46:28.310 # no identical pair either -- treat it as a straight replace
2025-07-01 05:46:28.317 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:46:28.322 return
2025-07-01 05:46:28.327 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:46:28.332 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:46:28.337 else:
2025-07-01 05:46:28.343 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:46:28.354 eqi = None
2025-07-01 05:46:28.364
2025-07-01 05:46:28.372 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:46:28.380 # identical
2025-07-01 05:46:28.387
2025-07-01 05:46:28.395 # pump out diffs from before the synch point
2025-07-01 05:46:28.402 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:46:28.408
2025-07-01 05:46:28.415 # do intraline marking on the synch pair
2025-07-01 05:46:28.422 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:46:28.433 if eqi is None:
2025-07-01 05:46:28.441 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:46:28.448 atags = btags = ""
2025-07-01 05:46:28.454 cruncher.set_seqs(aelt, belt)
2025-07-01 05:46:28.459 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:46:28.463 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:46:28.468 if tag == 'replace':
2025-07-01 05:46:28.473 atags += '^' * la
2025-07-01 05:46:28.478 btags += '^' * lb
2025-07-01 05:46:28.484 elif tag == 'delete':
2025-07-01 05:46:28.491 atags += '-' * la
2025-07-01 05:46:28.502 elif tag == 'insert':
2025-07-01 05:46:28.509 btags += '+' * lb
2025-07-01 05:46:28.516 elif tag == 'equal':
2025-07-01 05:46:28.522 atags += ' ' * la
2025-07-01 05:46:28.533 btags += ' ' * lb
2025-07-01 05:46:28.543 else:
2025-07-01 05:46:28.551 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:46:28.559 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:46:28.567 else:
2025-07-01 05:46:28.578 # the synch pair is identical
2025-07-01 05:46:28.587 yield ' ' + aelt
2025-07-01 05:46:28.595
2025-07-01 05:46:28.602 # pump out diffs from after the synch point
2025-07-01 05:46:28.608 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:46:28.614
2025-07-01 05:46:28.622 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:46:28.630 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:46:28.638
2025-07-01 05:46:28.647 self = <difflib.Differ object at [hex]>
2025-07-01 05:46:28.659 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:46:28.670 alo = 165, ahi = 1101
2025-07-01 05:46:28.680 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:46:28.688 blo = 165, bhi = 1101
2025-07-01 05:46:28.695
2025-07-01 05:46:28.700 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:46:28.708 g = []
2025-07-01 05:46:28.719 if alo < ahi:
2025-07-01 05:46:28.728 if blo < bhi:
2025-07-01 05:46:28.738 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:46:28.744 else:
2025-07-01 05:46:28.750 g = self._dump('-', a, alo, ahi)
2025-07-01 05:46:28.759 elif blo < bhi:
2025-07-01 05:46:28.770 g = self._dump('+', b, blo, bhi)
2025-07-01 05:46:28.781
2025-07-01 05:46:28.788 > yield from g
2025-07-01 05:46:28.793
2025-07-01 05:46:28.799 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:46:28.804 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:46:28.809
2025-07-01 05:46:28.815 self = <difflib.Differ object at [hex]>
2025-07-01 05:46:28.821 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:46:28.834 alo = 165, ahi = 1101
2025-07-01 05:46:28.843 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:46:28.854 blo = 165, bhi = 1101
2025-07-01 05:46:28.862
2025-07-01 05:46:28.871 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:46:28.878 r"""
2025-07-01 05:46:28.885 When replacing one block of lines with another, search the blocks
2025-07-01 05:46:28.892 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:46:28.899 synch point, and intraline difference marking is done on the
2025-07-01 05:46:28.905 similar pair. Lots of work, but often worth it.
2025-07-01 05:46:28.912
2025-07-01 05:46:28.918 Example:
2025-07-01 05:46:28.925
2025-07-01 05:46:28.932 >>> d = Differ()
2025-07-01 05:46:28.939 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:46:28.946 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:46:28.957 >>> print(''.join(results), end="")
2025-07-01 05:46:28.965 - abcDefghiJkl
2025-07-01 05:46:28.979 + abcdefGhijkl
2025-07-01 05:46:28.996 """
2025-07-01 05:46:29.002
2025-07-01 05:46:29.008 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:46:29.014 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:46:29.021 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:46:29.028 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:46:29.035 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:46:29.041
2025-07-01 05:46:29.048 # search for the pair that matches best without being identical
2025-07-01 05:46:29.055 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:46:29.062 # on junk -- unless we have to)
2025-07-01 05:46:29.069 for j in range(blo, bhi):
2025-07-01 05:46:29.076 bj = b[j]
2025-07-01 05:46:29.082 cruncher.set_seq2(bj)
2025-07-01 05:46:29.089 for i in range(alo, ahi):
2025-07-01 05:46:29.096 ai = a[i]
2025-07-01 05:46:29.103 if ai == bj:
2025-07-01 05:46:29.110 if eqi is None:
2025-07-01 05:46:29.116 eqi, eqj = i, j
2025-07-01 05:46:29.123 continue
2025-07-01 05:46:29.133 cruncher.set_seq1(ai)
2025-07-01 05:46:29.143 # computing similarity is expensive, so use the quick
2025-07-01 05:46:29.152 # upper bounds first -- have seen this speed up messy
2025-07-01 05:46:29.158 # compares by a factor of 3.
2025-07-01 05:46:29.164 # note that ratio() is only expensive to compute the first
2025-07-01 05:46:29.169 # time it's called on a sequence pair; the expensive part
2025-07-01 05:46:29.174 # of the computation is cached by cruncher
2025-07-01 05:46:29.180 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:46:29.187 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:46:29.195 cruncher.ratio() > best_ratio:
2025-07-01 05:46:29.201 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:46:29.207 if best_ratio < cutoff:
2025-07-01 05:46:29.213 # no non-identical "pretty close" pair
2025-07-01 05:46:29.219 if eqi is None:
2025-07-01 05:46:29.225 # no identical pair either -- treat it as a straight replace
2025-07-01 05:46:29.232 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:46:29.243 return
2025-07-01 05:46:29.256 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:46:29.268 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:46:29.279 else:
2025-07-01 05:46:29.287 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:46:29.295 eqi = None
2025-07-01 05:46:29.302
2025-07-01 05:46:29.311 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:46:29.324 # identical
2025-07-01 05:46:29.332
2025-07-01 05:46:29.338 # pump out diffs from before the synch point
2025-07-01 05:46:29.347 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:46:29.355
2025-07-01 05:46:29.361 # do intraline marking on the synch pair
2025-07-01 05:46:29.367 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:46:29.375 if eqi is None:
2025-07-01 05:46:29.387 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:46:29.397 atags = btags = ""
2025-07-01 05:46:29.405 cruncher.set_seqs(aelt, belt)
2025-07-01 05:46:29.411 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:46:29.417 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:46:29.427 if tag == 'replace':
2025-07-01 05:46:29.437 atags += '^' * la
2025-07-01 05:46:29.449 btags += '^' * lb
2025-07-01 05:46:29.460 elif tag == 'delete':
2025-07-01 05:46:29.469 atags += '-' * la
2025-07-01 05:46:29.476 elif tag == 'insert':
2025-07-01 05:46:29.488 btags += '+' * lb
2025-07-01 05:46:29.499 elif tag == 'equal':
2025-07-01 05:46:29.510 atags += ' ' * la
2025-07-01 05:46:29.520 btags += ' ' * lb
2025-07-01 05:46:29.531 else:
2025-07-01 05:46:29.540 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:46:29.548 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:46:29.555 else:
2025-07-01 05:46:29.561 # the synch pair is identical
2025-07-01 05:46:29.566 yield ' ' + aelt
2025-07-01 05:46:29.576
2025-07-01 05:46:29.585 # pump out diffs from after the synch point
2025-07-01 05:46:29.593 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:46:29.601
2025-07-01 05:46:29.607 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:46:29.613 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:46:29.622
2025-07-01 05:46:29.635 self = <difflib.Differ object at [hex]>
2025-07-01 05:46:29.646 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:46:29.661 alo = 166, ahi = 1101
2025-07-01 05:46:29.672 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:46:29.680 blo = 166, bhi = 1101
2025-07-01 05:46:29.687
2025-07-01 05:46:29.701 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:46:29.711 g = []
2025-07-01 05:46:29.720 if alo < ahi:
2025-07-01 05:46:29.733 if blo < bhi:
2025-07-01 05:46:29.749 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:46:29.761 else:
2025-07-01 05:46:29.771 g = self._dump('-', a, alo, ahi)
2025-07-01 05:46:29.782 elif blo < bhi:
2025-07-01 05:46:29.795 g = self._dump('+', b, blo, bhi)
2025-07-01 05:46:29.804
2025-07-01 05:46:29.810 > yield from g
2025-07-01 05:46:29.817
2025-07-01 05:46:29.822 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:46:29.828 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:46:29.837
2025-07-01 05:46:29.849 self = <difflib.Differ object at [hex]>
2025-07-01 05:46:29.859 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:46:29.871 alo = 166, ahi = 1101
2025-07-01 05:46:29.884 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:46:29.895 blo = 166, bhi = 1101
2025-07-01 05:46:29.902
2025-07-01 05:46:29.912 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:46:29.923 r"""
2025-07-01 05:46:29.935 When replacing one block of lines with another, search the blocks
2025-07-01 05:46:29.947 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:46:29.957 synch point, and intraline difference marking is done on the
2025-07-01 05:46:29.967 similar pair. Lots of work, but often worth it.
2025-07-01 05:46:29.975
2025-07-01 05:46:29.980 Example:
2025-07-01 05:46:29.986
2025-07-01 05:46:29.993 >>> d = Differ()
2025-07-01 05:46:29.999 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:46:30.005 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:46:30.010 >>> print(''.join(results), end="")
2025-07-01 05:46:30.015 - abcDefghiJkl
2025-07-01 05:46:30.026 + abcdefGhijkl
2025-07-01 05:46:30.039 """
2025-07-01 05:46:30.050
2025-07-01 05:46:30.059 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:46:30.067 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:46:30.076 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:46:30.085 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:46:30.093 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:46:30.100
2025-07-01 05:46:30.105 # search for the pair that matches best without being identical
2025-07-01 05:46:30.110 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:46:30.114 # on junk -- unless we have to)
2025-07-01 05:46:30.119 for j in range(blo, bhi):
2025-07-01 05:46:30.126 bj = b[j]
2025-07-01 05:46:30.136 cruncher.set_seq2(bj)
2025-07-01 05:46:30.143 for i in range(alo, ahi):
2025-07-01 05:46:30.150 ai = a[i]
2025-07-01 05:46:30.155 if ai == bj:
2025-07-01 05:46:30.161 if eqi is None:
2025-07-01 05:46:30.167 eqi, eqj = i, j
2025-07-01 05:46:30.173 continue
2025-07-01 05:46:30.182 cruncher.set_seq1(ai)
2025-07-01 05:46:30.189 # computing similarity is expensive, so use the quick
2025-07-01 05:46:30.195 # upper bounds first -- have seen this speed up messy
2025-07-01 05:46:30.201 # compares by a factor of 3.
2025-07-01 05:46:30.207 # note that ratio() is only expensive to compute the first
2025-07-01 05:46:30.213 # time it's called on a sequence pair; the expensive part
2025-07-01 05:46:30.219 # of the computation is cached by cruncher
2025-07-01 05:46:30.228 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:46:30.240 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:46:30.250 cruncher.ratio() > best_ratio:
2025-07-01 05:46:30.260 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:46:30.269 if best_ratio < cutoff:
2025-07-01 05:46:30.278 # no non-identical "pretty close" pair
2025-07-01 05:46:30.291 if eqi is None:
2025-07-01 05:46:30.304 # no identical pair either -- treat it as a straight replace
2025-07-01 05:46:30.312 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:46:30.323 return
2025-07-01 05:46:30.333 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:46:30.341 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:46:30.349 else:
2025-07-01 05:46:30.355 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:46:30.361 eqi = None
2025-07-01 05:46:30.366
2025-07-01 05:46:30.374 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:46:30.381 # identical
2025-07-01 05:46:30.389
2025-07-01 05:46:30.399 # pump out diffs from before the synch point
2025-07-01 05:46:30.407 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:46:30.416
2025-07-01 05:46:30.427 # do intraline marking on the synch pair
2025-07-01 05:46:30.437 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:46:30.445 if eqi is None:
2025-07-01 05:46:30.451 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:46:30.459 atags = btags = ""
2025-07-01 05:46:30.472 cruncher.set_seqs(aelt, belt)
2025-07-01 05:46:30.483 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:46:30.491 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:46:30.501 if tag == 'replace':
2025-07-01 05:46:30.507 atags += '^' * la
2025-07-01 05:46:30.514 btags += '^' * lb
2025-07-01 05:46:30.521 elif tag == 'delete':
2025-07-01 05:46:30.528 atags += '-' * la
2025-07-01 05:46:30.533 elif tag == 'insert':
2025-07-01 05:46:30.541 btags += '+' * lb
2025-07-01 05:46:30.546 elif tag == 'equal':
2025-07-01 05:46:30.557 atags += ' ' * la
2025-07-01 05:46:30.566 btags += ' ' * lb
2025-07-01 05:46:30.578 else:
2025-07-01 05:46:30.587 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:46:30.594 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:46:30.605 else:
2025-07-01 05:46:30.614 # the synch pair is identical
2025-07-01 05:46:30.621 yield ' ' + aelt
2025-07-01 05:46:30.627
2025-07-01 05:46:30.632 # pump out diffs from after the synch point
2025-07-01 05:46:30.637 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:46:30.643
2025-07-01 05:46:30.648 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:46:30.654 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:46:30.664
2025-07-01 05:46:30.674 self = <difflib.Differ object at [hex]>
2025-07-01 05:46:30.682 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:46:30.688 alo = 167, ahi = 1101
2025-07-01 05:46:30.694 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:46:30.704 blo = 167, bhi = 1101
2025-07-01 05:46:30.714
2025-07-01 05:46:30.721 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:46:30.729 g = []
2025-07-01 05:46:30.737 if alo < ahi:
2025-07-01 05:46:30.749 if blo < bhi:
2025-07-01 05:46:30.761 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:46:30.769 else:
2025-07-01 05:46:30.776 g = self._dump('-', a, alo, ahi)
2025-07-01 05:46:30.783 elif blo < bhi:
2025-07-01 05:46:30.789 g = self._dump('+', b, blo, bhi)
2025-07-01 05:46:30.795
2025-07-01 05:46:30.801 > yield from g
2025-07-01 05:46:30.807
2025-07-01 05:46:30.813 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:46:30.819 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:46:30.825
2025-07-01 05:46:30.832 self = <difflib.Differ object at [hex]>
2025-07-01 05:46:30.838 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:46:30.844 alo = 167, ahi = 1101
2025-07-01 05:46:30.850 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:46:30.856 blo = 167, bhi = 1101
2025-07-01 05:46:30.863
2025-07-01 05:46:30.874 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:46:30.882 r"""
2025-07-01 05:46:30.891 When replacing one block of lines with another, search the blocks
2025-07-01 05:46:30.902 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:46:30.911 synch point, and intraline difference marking is done on the
2025-07-01 05:46:30.919 similar pair. Lots of work, but often worth it.
2025-07-01 05:46:30.927
2025-07-01 05:46:30.937 Example:
2025-07-01 05:46:30.945
2025-07-01 05:46:30.956 >>> d = Differ()
2025-07-01 05:46:30.966 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:46:30.972 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:46:30.978 >>> print(''.join(results), end="")
2025-07-01 05:46:30.989 - abcDefghiJkl
2025-07-01 05:46:31.008 + abcdefGhijkl
2025-07-01 05:46:31.018 """
2025-07-01 05:46:31.023
2025-07-01 05:46:31.031 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:46:31.042 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:46:31.050 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:46:31.058 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:46:31.065 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:46:31.071
2025-07-01 05:46:31.080 # search for the pair that matches best without being identical
2025-07-01 05:46:31.090 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:46:31.096 # on junk -- unless we have to)
2025-07-01 05:46:31.102 for j in range(blo, bhi):
2025-07-01 05:46:31.107 bj = b[j]
2025-07-01 05:46:31.112 cruncher.set_seq2(bj)
2025-07-01 05:46:31.119 for i in range(alo, ahi):
2025-07-01 05:46:31.125 ai = a[i]
2025-07-01 05:46:31.130 if ai == bj:
2025-07-01 05:46:31.138 if eqi is None:
2025-07-01 05:46:31.146 eqi, eqj = i, j
2025-07-01 05:46:31.155 continue
2025-07-01 05:46:31.161 cruncher.set_seq1(ai)
2025-07-01 05:46:31.168 # computing similarity is expensive, so use the quick
2025-07-01 05:46:31.175 # upper bounds first -- have seen this speed up messy
2025-07-01 05:46:31.181 # compares by a factor of 3.
2025-07-01 05:46:31.186 # note that ratio() is only expensive to compute the first
2025-07-01 05:46:31.192 # time it's called on a sequence pair; the expensive part
2025-07-01 05:46:31.198 # of the computation is cached by cruncher
2025-07-01 05:46:31.207 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:46:31.215 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:46:31.222 cruncher.ratio() > best_ratio:
2025-07-01 05:46:31.229 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:46:31.236 if best_ratio < cutoff:
2025-07-01 05:46:31.246 # no non-identical "pretty close" pair
2025-07-01 05:46:31.255 if eqi is None:
2025-07-01 05:46:31.264 # no identical pair either -- treat it as a straight replace
2025-07-01 05:46:31.277 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:46:31.288 return
2025-07-01 05:46:31.295 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:46:31.302 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:46:31.308 else:
2025-07-01 05:46:31.315 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:46:31.320 eqi = None
2025-07-01 05:46:31.326
2025-07-01 05:46:31.338 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:46:31.349 # identical
2025-07-01 05:46:31.357
2025-07-01 05:46:31.364 # pump out diffs from before the synch point
2025-07-01 05:46:31.370 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:46:31.376
2025-07-01 05:46:31.382 # do intraline marking on the synch pair
2025-07-01 05:46:31.387 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:46:31.393 if eqi is None:
2025-07-01 05:46:31.400 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:46:31.407 atags = btags = ""
2025-07-01 05:46:31.415 cruncher.set_seqs(aelt, belt)
2025-07-01 05:46:31.426 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:46:31.434 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:46:31.441 if tag == 'replace':
2025-07-01 05:46:31.447 atags += '^' * la
2025-07-01 05:46:31.454 btags += '^' * lb
2025-07-01 05:46:31.464 elif tag == 'delete':
2025-07-01 05:46:31.472 atags += '-' * la
2025-07-01 05:46:31.479 elif tag == 'insert':
2025-07-01 05:46:31.484 btags += '+' * lb
2025-07-01 05:46:31.490 elif tag == 'equal':
2025-07-01 05:46:31.494 atags += ' ' * la
2025-07-01 05:46:31.500 btags += ' ' * lb
2025-07-01 05:46:31.505 else:
2025-07-01 05:46:31.510 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:46:31.521 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:46:31.532 else:
2025-07-01 05:46:31.541 # the synch pair is identical
2025-07-01 05:46:31.548 yield ' ' + aelt
2025-07-01 05:46:31.554
2025-07-01 05:46:31.562 # pump out diffs from after the synch point
2025-07-01 05:46:31.574 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:46:31.585
2025-07-01 05:46:31.593 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:46:31.600 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:46:31.605
2025-07-01 05:46:31.610 self = <difflib.Differ object at [hex]>
2025-07-01 05:46:31.618 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:46:31.626 alo = 168, ahi = 1101
2025-07-01 05:46:31.634 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:46:31.642 blo = 168, bhi = 1101
2025-07-01 05:46:31.653
2025-07-01 05:46:31.663 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:46:31.671 g = []
2025-07-01 05:46:31.677 if alo < ahi:
2025-07-01 05:46:31.682 if blo < bhi:
2025-07-01 05:46:31.687 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:46:31.693 else:
2025-07-01 05:46:31.699 g = self._dump('-', a, alo, ahi)
2025-07-01 05:46:31.705 elif blo < bhi:
2025-07-01 05:46:31.711 g = self._dump('+', b, blo, bhi)
2025-07-01 05:46:31.717
2025-07-01 05:46:31.724 > yield from g
2025-07-01 05:46:31.730
2025-07-01 05:46:31.740 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:46:31.753 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:46:31.764
2025-07-01 05:46:31.775 self = <difflib.Differ object at [hex]>
2025-07-01 05:46:31.787 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:46:31.799 alo = 168, ahi = 1101
2025-07-01 05:46:31.810 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:46:31.818 blo = 168, bhi = 1101
2025-07-01 05:46:31.826
2025-07-01 05:46:31.836 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:46:31.845 r"""
2025-07-01 05:46:31.852 When replacing one block of lines with another, search the blocks
2025-07-01 05:46:31.858 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:46:31.864 synch point, and intraline difference marking is done on the
2025-07-01 05:46:31.870 similar pair. Lots of work, but often worth it.
2025-07-01 05:46:31.881
2025-07-01 05:46:31.889 Example:
2025-07-01 05:46:31.896
2025-07-01 05:46:31.903 >>> d = Differ()
2025-07-01 05:46:31.911 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:46:31.924 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:46:31.933 >>> print(''.join(results), end="")
2025-07-01 05:46:31.941 - abcDefghiJkl
2025-07-01 05:46:31.954 + abcdefGhijkl
2025-07-01 05:46:31.965 """
2025-07-01 05:46:31.969
2025-07-01 05:46:31.974 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:46:31.978 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:46:31.983 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:46:31.988 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:46:31.994 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:46:32.000
2025-07-01 05:46:32.007 # search for the pair that matches best without being identical
2025-07-01 05:46:32.015 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:46:32.021 # on junk -- unless we have to)
2025-07-01 05:46:32.027 for j in range(blo, bhi):
2025-07-01 05:46:32.033 bj = b[j]
2025-07-01 05:46:32.038 cruncher.set_seq2(bj)
2025-07-01 05:46:32.051 for i in range(alo, ahi):
2025-07-01 05:46:32.062 ai = a[i]
2025-07-01 05:46:32.073 if ai == bj:
2025-07-01 05:46:32.086 if eqi is None:
2025-07-01 05:46:32.095 eqi, eqj = i, j
2025-07-01 05:46:32.107 continue
2025-07-01 05:46:32.116 cruncher.set_seq1(ai)
2025-07-01 05:46:32.129 # computing similarity is expensive, so use the quick
2025-07-01 05:46:32.140 # upper bounds first -- have seen this speed up messy
2025-07-01 05:46:32.150 # compares by a factor of 3.
2025-07-01 05:46:32.156 # note that ratio() is only expensive to compute the first
2025-07-01 05:46:32.163 # time it's called on a sequence pair; the expensive part
2025-07-01 05:46:32.170 # of the computation is cached by cruncher
2025-07-01 05:46:32.179 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:46:32.190 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:46:32.199 cruncher.ratio() > best_ratio:
2025-07-01 05:46:32.206 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:46:32.214 if best_ratio < cutoff:
2025-07-01 05:46:32.221 # no non-identical "pretty close" pair
2025-07-01 05:46:32.227 if eqi is None:
2025-07-01 05:46:32.235 # no identical pair either -- treat it as a straight replace
2025-07-01 05:46:32.243 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:46:32.254 return
2025-07-01 05:46:32.264 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:46:32.271 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:46:32.277 else:
2025-07-01 05:46:32.289 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:46:32.298 eqi = None
2025-07-01 05:46:32.308
2025-07-01 05:46:32.319 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:46:32.329 # identical
2025-07-01 05:46:32.341
2025-07-01 05:46:32.350 # pump out diffs from before the synch point
2025-07-01 05:46:32.360 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:46:32.371
2025-07-01 05:46:32.382 # do intraline marking on the synch pair
2025-07-01 05:46:32.390 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:46:32.397 if eqi is None:
2025-07-01 05:46:32.404 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:46:32.417 atags = btags = ""
2025-07-01 05:46:32.425 cruncher.set_seqs(aelt, belt)
2025-07-01 05:46:32.433 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:46:32.439 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:46:32.445 if tag == 'replace':
2025-07-01 05:46:32.452 atags += '^' * la
2025-07-01 05:46:32.458 btags += '^' * lb
2025-07-01 05:46:32.464 elif tag == 'delete':
2025-07-01 05:46:32.471 atags += '-' * la
2025-07-01 05:46:32.482 elif tag == 'insert':
2025-07-01 05:46:32.492 btags += '+' * lb
2025-07-01 05:46:32.501 elif tag == 'equal':
2025-07-01 05:46:32.513 atags += ' ' * la
2025-07-01 05:46:32.522 btags += ' ' * lb
2025-07-01 05:46:32.532 else:
2025-07-01 05:46:32.540 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:46:32.547 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:46:32.555 else:
2025-07-01 05:46:32.569 # the synch pair is identical
2025-07-01 05:46:32.578 yield ' ' + aelt
2025-07-01 05:46:32.585
2025-07-01 05:46:32.592 # pump out diffs from after the synch point
2025-07-01 05:46:32.600 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:46:32.610
2025-07-01 05:46:32.620 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:46:32.632 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:46:32.640
2025-07-01 05:46:32.650 self = <difflib.Differ object at [hex]>
2025-07-01 05:46:32.664 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:46:32.674 alo = 169, ahi = 1101
2025-07-01 05:46:32.683 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:46:32.692 blo = 169, bhi = 1101
2025-07-01 05:46:32.699
2025-07-01 05:46:32.706 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:46:32.717 g = []
2025-07-01 05:46:32.726 if alo < ahi:
2025-07-01 05:46:32.734 if blo < bhi:
2025-07-01 05:46:32.740 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:46:32.746 else:
2025-07-01 05:46:32.753 g = self._dump('-', a, alo, ahi)
2025-07-01 05:46:32.760 elif blo < bhi:
2025-07-01 05:46:32.767 g = self._dump('+', b, blo, bhi)
2025-07-01 05:46:32.773
2025-07-01 05:46:32.782 > yield from g
2025-07-01 05:46:32.794
2025-07-01 05:46:32.806 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:46:32.820 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:46:32.831
2025-07-01 05:46:32.843 self = <difflib.Differ object at [hex]>
2025-07-01 05:46:32.853 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:46:32.860 alo = 169, ahi = 1101
2025-07-01 05:46:32.868 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:46:32.875 blo = 169, bhi = 1101
2025-07-01 05:46:32.883
2025-07-01 05:46:32.892 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:46:32.898 r"""
2025-07-01 05:46:32.904 When replacing one block of lines with another, search the blocks
2025-07-01 05:46:32.910 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:46:32.916 synch point, and intraline difference marking is done on the
2025-07-01 05:46:32.922 similar pair. Lots of work, but often worth it.
2025-07-01 05:46:32.928
2025-07-01 05:46:32.935 Example:
2025-07-01 05:46:32.945
2025-07-01 05:46:32.958 >>> d = Differ()
2025-07-01 05:46:32.968 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:46:32.977 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:46:32.983 >>> print(''.join(results), end="")
2025-07-01 05:46:32.991 - abcDefghiJkl
2025-07-01 05:46:33.009 + abcdefGhijkl
2025-07-01 05:46:33.030 """
2025-07-01 05:46:33.041
2025-07-01 05:46:33.053 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:46:33.064 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:46:33.073 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:46:33.081 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:46:33.088 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:46:33.095
2025-07-01 05:46:33.108 # search for the pair that matches best without being identical
2025-07-01 05:46:33.121 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:46:33.131 # on junk -- unless we have to)
2025-07-01 05:46:33.140 for j in range(blo, bhi):
2025-07-01 05:46:33.147 bj = b[j]
2025-07-01 05:46:33.160 cruncher.set_seq2(bj)
2025-07-01 05:46:33.172 for i in range(alo, ahi):
2025-07-01 05:46:33.183 ai = a[i]
2025-07-01 05:46:33.191 if ai == bj:
2025-07-01 05:46:33.198 if eqi is None:
2025-07-01 05:46:33.212 eqi, eqj = i, j
2025-07-01 05:46:33.223 continue
2025-07-01 05:46:33.236 cruncher.set_seq1(ai)
2025-07-01 05:46:33.248 # computing similarity is expensive, so use the quick
2025-07-01 05:46:33.258 # upper bounds first -- have seen this speed up messy
2025-07-01 05:46:33.265 # compares by a factor of 3.
2025-07-01 05:46:33.278 # note that ratio() is only expensive to compute the first
2025-07-01 05:46:33.290 # time it's called on a sequence pair; the expensive part
2025-07-01 05:46:33.302 # of the computation is cached by cruncher
2025-07-01 05:46:33.311 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:46:33.319 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:46:33.331 cruncher.ratio() > best_ratio:
2025-07-01 05:46:33.341 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:46:33.352 if best_ratio < cutoff:
2025-07-01 05:46:33.363 # no non-identical "pretty close" pair
2025-07-01 05:46:33.374 if eqi is None:
2025-07-01 05:46:33.385 # no identical pair either -- treat it as a straight replace
2025-07-01 05:46:33.393 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:46:33.400 return
2025-07-01 05:46:33.406 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:46:33.418 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:46:33.427 else:
2025-07-01 05:46:33.435 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:46:33.441 eqi = None
2025-07-01 05:46:33.447
2025-07-01 05:46:33.454 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:46:33.465 # identical
2025-07-01 05:46:33.475
2025-07-01 05:46:33.482 # pump out diffs from before the synch point
2025-07-01 05:46:33.488 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:46:33.492
2025-07-01 05:46:33.499 # do intraline marking on the synch pair
2025-07-01 05:46:33.510 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:46:33.518 if eqi is None:
2025-07-01 05:46:33.525 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:46:33.531 atags = btags = ""
2025-07-01 05:46:33.536 cruncher.set_seqs(aelt, belt)
2025-07-01 05:46:33.542 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:46:33.547 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:46:33.553 if tag == 'replace':
2025-07-01 05:46:33.559 atags += '^' * la
2025-07-01 05:46:33.566 btags += '^' * lb
2025-07-01 05:46:33.578 elif tag == 'delete':
2025-07-01 05:46:33.588 atags += '-' * la
2025-07-01 05:46:33.601 elif tag == 'insert':
2025-07-01 05:46:33.609 btags += '+' * lb
2025-07-01 05:46:33.621 elif tag == 'equal':
2025-07-01 05:46:33.632 atags += ' ' * la
2025-07-01 05:46:33.644 btags += ' ' * lb
2025-07-01 05:46:33.653 else:
2025-07-01 05:46:33.664 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:46:33.676 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:46:33.688 else:
2025-07-01 05:46:33.700 # the synch pair is identical
2025-07-01 05:46:33.709 yield ' ' + aelt
2025-07-01 05:46:33.717
2025-07-01 05:46:33.723 # pump out diffs from after the synch point
2025-07-01 05:46:33.729 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:46:33.733
2025-07-01 05:46:33.738 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:46:33.743 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:46:33.751
2025-07-01 05:46:33.757 self = <difflib.Differ object at [hex]>
2025-07-01 05:46:33.763 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:46:33.769 alo = 170, ahi = 1101
2025-07-01 05:46:33.776 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:46:33.782 blo = 170, bhi = 1101
2025-07-01 05:46:33.791
2025-07-01 05:46:33.801 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:46:33.807 g = []
2025-07-01 05:46:33.813 if alo < ahi:
2025-07-01 05:46:33.819 if blo < bhi:
2025-07-01 05:46:33.826 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:46:33.836 else:
2025-07-01 05:46:33.848 g = self._dump('-', a, alo, ahi)
2025-07-01 05:46:33.855 elif blo < bhi:
2025-07-01 05:46:33.863 g = self._dump('+', b, blo, bhi)
2025-07-01 05:46:33.876
2025-07-01 05:46:33.888 > yield from g
2025-07-01 05:46:33.900
2025-07-01 05:46:33.909 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:46:33.922 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:46:33.932
2025-07-01 05:46:33.940 self = <difflib.Differ object at [hex]>
2025-07-01 05:46:33.954 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:46:33.965 alo = 170, ahi = 1101
2025-07-01 05:46:33.978 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:46:33.990 blo = 170, bhi = 1101
2025-07-01 05:46:33.999
2025-07-01 05:46:34.011 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:46:34.020 r"""
2025-07-01 05:46:34.026 When replacing one block of lines with another, search the blocks
2025-07-01 05:46:34.032 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:46:34.038 synch point, and intraline difference marking is done on the
2025-07-01 05:46:34.044 similar pair. Lots of work, but often worth it.
2025-07-01 05:46:34.051
2025-07-01 05:46:34.058 Example:
2025-07-01 05:46:34.070
2025-07-01 05:46:34.078 >>> d = Differ()
2025-07-01 05:46:34.084 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:46:34.090 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:46:34.096 >>> print(''.join(results), end="")
2025-07-01 05:46:34.101 - abcDefghiJkl
2025-07-01 05:46:34.113 + abcdefGhijkl
2025-07-01 05:46:34.127 """
2025-07-01 05:46:34.136
2025-07-01 05:46:34.142 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:46:34.150 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:46:34.155 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:46:34.160 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:46:34.164 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:46:34.169
2025-07-01 05:46:34.174 # search for the pair that matches best without being identical
2025-07-01 05:46:34.178 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:46:34.183 # on junk -- unless we have to)
2025-07-01 05:46:34.188 for j in range(blo, bhi):
2025-07-01 05:46:34.195 bj = b[j]
2025-07-01 05:46:34.201 cruncher.set_seq2(bj)
2025-07-01 05:46:34.208 for i in range(alo, ahi):
2025-07-01 05:46:34.214 ai = a[i]
2025-07-01 05:46:34.220 if ai == bj:
2025-07-01 05:46:34.226 if eqi is None:
2025-07-01 05:46:34.232 eqi, eqj = i, j
2025-07-01 05:46:34.238 continue
2025-07-01 05:46:34.244 cruncher.set_seq1(ai)
2025-07-01 05:46:34.250 # computing similarity is expensive, so use the quick
2025-07-01 05:46:34.255 # upper bounds first -- have seen this speed up messy
2025-07-01 05:46:34.261 # compares by a factor of 3.
2025-07-01 05:46:34.267 # note that ratio() is only expensive to compute the first
2025-07-01 05:46:34.273 # time it's called on a sequence pair; the expensive part
2025-07-01 05:46:34.279 # of the computation is cached by cruncher
2025-07-01 05:46:34.284 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:46:34.289 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:46:34.299 cruncher.ratio() > best_ratio:
2025-07-01 05:46:34.306 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:46:34.317 if best_ratio < cutoff:
2025-07-01 05:46:34.328 # no non-identical "pretty close" pair
2025-07-01 05:46:34.337 if eqi is None:
2025-07-01 05:46:34.345 # no identical pair either -- treat it as a straight replace
2025-07-01 05:46:34.352 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:46:34.362 return
2025-07-01 05:46:34.373 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:46:34.383 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:46:34.392 else:
2025-07-01 05:46:34.404 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:46:34.415 eqi = None
2025-07-01 05:46:34.423
2025-07-01 05:46:34.430 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:46:34.437 # identical
2025-07-01 05:46:34.445
2025-07-01 05:46:34.452 # pump out diffs from before the synch point
2025-07-01 05:46:34.459 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:46:34.465
2025-07-01 05:46:34.470 # do intraline marking on the synch pair
2025-07-01 05:46:34.481 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:46:34.490 if eqi is None:
2025-07-01 05:46:34.497 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:46:34.503 atags = btags = ""
2025-07-01 05:46:34.509 cruncher.set_seqs(aelt, belt)
2025-07-01 05:46:34.515 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:46:34.520 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:46:34.526 if tag == 'replace':
2025-07-01 05:46:34.536 atags += '^' * la
2025-07-01 05:46:34.548 btags += '^' * lb
2025-07-01 05:46:34.557 elif tag == 'delete':
2025-07-01 05:46:34.564 atags += '-' * la
2025-07-01 05:46:34.570 elif tag == 'insert':
2025-07-01 05:46:34.582 btags += '+' * lb
2025-07-01 05:46:34.594 elif tag == 'equal':
2025-07-01 05:46:34.604 atags += ' ' * la
2025-07-01 05:46:34.615 btags += ' ' * lb
2025-07-01 05:46:34.624 else:
2025-07-01 05:46:34.631 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:46:34.638 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:46:34.645 else:
2025-07-01 05:46:34.652 # the synch pair is identical
2025-07-01 05:46:34.661 yield ' ' + aelt
2025-07-01 05:46:34.673
2025-07-01 05:46:34.684 # pump out diffs from after the synch point
2025-07-01 05:46:34.693 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:46:34.700
2025-07-01 05:46:34.706 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:46:34.712 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:46:34.719
2025-07-01 05:46:34.726 self = <difflib.Differ object at [hex]>
2025-07-01 05:46:34.735 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:46:34.748 alo = 171, ahi = 1101
2025-07-01 05:46:34.757 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:46:34.763 blo = 171, bhi = 1101
2025-07-01 05:46:34.769
2025-07-01 05:46:34.774 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:46:34.779 g = []
2025-07-01 05:46:34.784 if alo < ahi:
2025-07-01 05:46:34.793 if blo < bhi:
2025-07-01 05:46:34.805 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:46:34.818 else:
2025-07-01 05:46:34.827 g = self._dump('-', a, alo, ahi)
2025-07-01 05:46:34.835 elif blo < bhi:
2025-07-01 05:46:34.843 g = self._dump('+', b, blo, bhi)
2025-07-01 05:46:34.850
2025-07-01 05:46:34.863 > yield from g
2025-07-01 05:46:34.873
2025-07-01 05:46:34.880 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:46:34.886 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:46:34.894
2025-07-01 05:46:34.900 self = <difflib.Differ object at [hex]>
2025-07-01 05:46:34.906 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:46:34.912 alo = 171, ahi = 1101
2025-07-01 05:46:34.919 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:46:34.927 blo = 171, bhi = 1101
2025-07-01 05:46:34.932
2025-07-01 05:46:34.937 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:46:34.942 r"""
2025-07-01 05:46:34.946 When replacing one block of lines with another, search the blocks
2025-07-01 05:46:34.952 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:46:34.958 synch point, and intraline difference marking is done on the
2025-07-01 05:46:34.963 similar pair. Lots of work, but often worth it.
2025-07-01 05:46:34.969
2025-07-01 05:46:34.975 Example:
2025-07-01 05:46:34.980
2025-07-01 05:46:34.986 >>> d = Differ()
2025-07-01 05:46:34.997 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:46:35.008 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:46:35.018 >>> print(''.join(results), end="")
2025-07-01 05:46:35.024 - abcDefghiJkl
2025-07-01 05:46:35.037 + abcdefGhijkl
2025-07-01 05:46:35.056 """
2025-07-01 05:46:35.067
2025-07-01 05:46:35.075 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:46:35.087 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:46:35.092 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:46:35.098 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:46:35.103 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:46:35.108
2025-07-01 05:46:35.112 # search for the pair that matches best without being identical
2025-07-01 05:46:35.117 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:46:35.122 # on junk -- unless we have to)
2025-07-01 05:46:35.127 for j in range(blo, bhi):
2025-07-01 05:46:35.133 bj = b[j]
2025-07-01 05:46:35.138 cruncher.set_seq2(bj)
2025-07-01 05:46:35.145 for i in range(alo, ahi):
2025-07-01 05:46:35.153 ai = a[i]
2025-07-01 05:46:35.159 if ai == bj:
2025-07-01 05:46:35.165 if eqi is None:
2025-07-01 05:46:35.169 eqi, eqj = i, j
2025-07-01 05:46:35.174 continue
2025-07-01 05:46:35.179 cruncher.set_seq1(ai)
2025-07-01 05:46:35.184 # computing similarity is expensive, so use the quick
2025-07-01 05:46:35.189 # upper bounds first -- have seen this speed up messy
2025-07-01 05:46:35.200 # compares by a factor of 3.
2025-07-01 05:46:35.209 # note that ratio() is only expensive to compute the first
2025-07-01 05:46:35.217 # time it's called on a sequence pair; the expensive part
2025-07-01 05:46:35.223 # of the computation is cached by cruncher
2025-07-01 05:46:35.228 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:46:35.233 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:46:35.238 cruncher.ratio() > best_ratio:
2025-07-01 05:46:35.244 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:46:35.250 if best_ratio < cutoff:
2025-07-01 05:46:35.257 # no non-identical "pretty close" pair
2025-07-01 05:46:35.264 if eqi is None:
2025-07-01 05:46:35.271 # no identical pair either -- treat it as a straight replace
2025-07-01 05:46:35.277 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:46:35.284 return
2025-07-01 05:46:35.291 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:46:35.299 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:46:35.309 else:
2025-07-01 05:46:35.317 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:46:35.323 eqi = None
2025-07-01 05:46:35.329
2025-07-01 05:46:35.334 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:46:35.339 # identical
2025-07-01 05:46:35.343
2025-07-01 05:46:35.348 # pump out diffs from before the synch point
2025-07-01 05:46:35.354 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:46:35.359
2025-07-01 05:46:35.365 # do intraline marking on the synch pair
2025-07-01 05:46:35.371 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:46:35.378 if eqi is None:
2025-07-01 05:46:35.387 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:46:35.395 atags = btags = ""
2025-07-01 05:46:35.401 cruncher.set_seqs(aelt, belt)
2025-07-01 05:46:35.407 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:46:35.413 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:46:35.419 if tag == 'replace':
2025-07-01 05:46:35.426 atags += '^' * la
2025-07-01 05:46:35.436 btags += '^' * lb
2025-07-01 05:46:35.447 elif tag == 'delete':
2025-07-01 05:46:35.456 atags += '-' * la
2025-07-01 05:46:35.463 elif tag == 'insert':
2025-07-01 05:46:35.469 btags += '+' * lb
2025-07-01 05:46:35.477 elif tag == 'equal':
2025-07-01 05:46:35.487 atags += ' ' * la
2025-07-01 05:46:35.495 btags += ' ' * lb
2025-07-01 05:46:35.501 else:
2025-07-01 05:46:35.507 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:46:35.512 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:46:35.516 else:
2025-07-01 05:46:35.521 # the synch pair is identical
2025-07-01 05:46:35.527 yield ' ' + aelt
2025-07-01 05:46:35.532
2025-07-01 05:46:35.538 # pump out diffs from after the synch point
2025-07-01 05:46:35.546 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:46:35.553
2025-07-01 05:46:35.559 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:46:35.565 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:46:35.570
2025-07-01 05:46:35.575 self = <difflib.Differ object at [hex]>
2025-07-01 05:46:35.581 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:46:35.587 alo = 172, ahi = 1101
2025-07-01 05:46:35.593 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:46:35.600 blo = 172, bhi = 1101
2025-07-01 05:46:35.606
2025-07-01 05:46:35.612 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:46:35.617 g = []
2025-07-01 05:46:35.622 if alo < ahi:
2025-07-01 05:46:35.626 if blo < bhi:
2025-07-01 05:46:35.632 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:46:35.636 else:
2025-07-01 05:46:35.641 g = self._dump('-', a, alo, ahi)
2025-07-01 05:46:35.645 elif blo < bhi:
2025-07-01 05:46:35.650 g = self._dump('+', b, blo, bhi)
2025-07-01 05:46:35.655
2025-07-01 05:46:35.660 > yield from g
2025-07-01 05:46:35.664
2025-07-01 05:46:35.669 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:46:35.674 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:46:35.679
2025-07-01 05:46:35.684 self = <difflib.Differ object at [hex]>
2025-07-01 05:46:35.690 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:46:35.695 alo = 172, ahi = 1101
2025-07-01 05:46:35.702 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:46:35.710 blo = 172, bhi = 1101
2025-07-01 05:46:35.717
2025-07-01 05:46:35.723 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:46:35.728 r"""
2025-07-01 05:46:35.734 When replacing one block of lines with another, search the blocks
2025-07-01 05:46:35.739 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:46:35.745 synch point, and intraline difference marking is done on the
2025-07-01 05:46:35.752 similar pair. Lots of work, but often worth it.
2025-07-01 05:46:35.758
2025-07-01 05:46:35.764 Example:
2025-07-01 05:46:35.771
2025-07-01 05:46:35.781 >>> d = Differ()
2025-07-01 05:46:35.790 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:46:35.797 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:46:35.803 >>> print(''.join(results), end="")
2025-07-01 05:46:35.809 - abcDefghiJkl
2025-07-01 05:46:35.818 + abcdefGhijkl
2025-07-01 05:46:35.827 """
2025-07-01 05:46:35.831
2025-07-01 05:46:35.838 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:46:35.843 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:46:35.849 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:46:35.854 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:46:35.860 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:46:35.867
2025-07-01 05:46:35.873 # search for the pair that matches best without being identical
2025-07-01 05:46:35.880 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:46:35.887 # on junk -- unless we have to)
2025-07-01 05:46:35.897 for j in range(blo, bhi):
2025-07-01 05:46:35.909 bj = b[j]
2025-07-01 05:46:35.917 cruncher.set_seq2(bj)
2025-07-01 05:46:35.925 for i in range(alo, ahi):
2025-07-01 05:46:35.937 ai = a[i]
2025-07-01 05:46:35.947 if ai == bj:
2025-07-01 05:46:35.953 if eqi is None:
2025-07-01 05:46:35.960 eqi, eqj = i, j
2025-07-01 05:46:35.966 continue
2025-07-01 05:46:35.973 cruncher.set_seq1(ai)
2025-07-01 05:46:35.980 # computing similarity is expensive, so use the quick
2025-07-01 05:46:35.987 # upper bounds first -- have seen this speed up messy
2025-07-01 05:46:35.994 # compares by a factor of 3.
2025-07-01 05:46:36.005 # note that ratio() is only expensive to compute the first
2025-07-01 05:46:36.014 # time it's called on a sequence pair; the expensive part
2025-07-01 05:46:36.022 # of the computation is cached by cruncher
2025-07-01 05:46:36.029 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:46:36.036 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:46:36.043 cruncher.ratio() > best_ratio:
2025-07-01 05:46:36.049 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:46:36.055 if best_ratio < cutoff:
2025-07-01 05:46:36.060 # no non-identical "pretty close" pair
2025-07-01 05:46:36.064 if eqi is None:
2025-07-01 05:46:36.069 # no identical pair either -- treat it as a straight replace
2025-07-01 05:46:36.073 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:46:36.078 return
2025-07-01 05:46:36.084 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:46:36.090 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:46:36.095 else:
2025-07-01 05:46:36.104 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:46:36.115 eqi = None
2025-07-01 05:46:36.123
2025-07-01 05:46:36.131 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:46:36.138 # identical
2025-07-01 05:46:36.144
2025-07-01 05:46:36.149 # pump out diffs from before the synch point
2025-07-01 05:46:36.155 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:46:36.161
2025-07-01 05:46:36.166 # do intraline marking on the synch pair
2025-07-01 05:46:36.172 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:46:36.179 if eqi is None:
2025-07-01 05:46:36.187 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:46:36.197 atags = btags = ""
2025-07-01 05:46:36.209 cruncher.set_seqs(aelt, belt)
2025-07-01 05:46:36.221 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:46:36.229 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:46:36.236 if tag == 'replace':
2025-07-01 05:46:36.243 atags += '^' * la
2025-07-01 05:46:36.250 btags += '^' * lb
2025-07-01 05:46:36.255 elif tag == 'delete':
2025-07-01 05:46:36.263 atags += '-' * la
2025-07-01 05:46:36.277 elif tag == 'insert':
2025-07-01 05:46:36.286 btags += '+' * lb
2025-07-01 05:46:36.294 elif tag == 'equal':
2025-07-01 05:46:36.302 atags += ' ' * la
2025-07-01 05:46:36.309 btags += ' ' * lb
2025-07-01 05:46:36.316 else:
2025-07-01 05:46:36.323 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:46:36.329 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:46:36.336 else:
2025-07-01 05:46:36.346 # the synch pair is identical
2025-07-01 05:46:36.356 yield ' ' + aelt
2025-07-01 05:46:36.363
2025-07-01 05:46:36.370 # pump out diffs from after the synch point
2025-07-01 05:46:36.377 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:46:36.391
2025-07-01 05:46:36.398 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:46:36.406 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:46:36.417
2025-07-01 05:46:36.424 self = <difflib.Differ object at [hex]>
2025-07-01 05:46:36.438 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:46:36.450 alo = 173, ahi = 1101
2025-07-01 05:46:36.460 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:46:36.467 blo = 173, bhi = 1101
2025-07-01 05:46:36.474
2025-07-01 05:46:36.480 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:46:36.486 g = []
2025-07-01 05:46:36.491 if alo < ahi:
2025-07-01 05:46:36.499 if blo < bhi:
2025-07-01 05:46:36.510 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:46:36.517 else:
2025-07-01 05:46:36.527 g = self._dump('-', a, alo, ahi)
2025-07-01 05:46:36.538 elif blo < bhi:
2025-07-01 05:46:36.551 g = self._dump('+', b, blo, bhi)
2025-07-01 05:46:36.564
2025-07-01 05:46:36.573 > yield from g
2025-07-01 05:46:36.586
2025-07-01 05:46:36.598 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:46:36.611 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:46:36.620
2025-07-01 05:46:36.628 self = <difflib.Differ object at [hex]>
2025-07-01 05:46:36.637 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:46:36.647 alo = 173, ahi = 1101
2025-07-01 05:46:36.660 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:46:36.670 blo = 173, bhi = 1101
2025-07-01 05:46:36.682
2025-07-01 05:46:36.692 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:46:36.705 r"""
2025-07-01 05:46:36.717 When replacing one block of lines with another, search the blocks
2025-07-01 05:46:36.731 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:46:36.743 synch point, and intraline difference marking is done on the
2025-07-01 05:46:36.752 similar pair. Lots of work, but often worth it.
2025-07-01 05:46:36.760
2025-07-01 05:46:36.767 Example:
2025-07-01 05:46:36.773
2025-07-01 05:46:36.783 >>> d = Differ()
2025-07-01 05:46:36.793 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:46:36.801 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:46:36.808 >>> print(''.join(results), end="")
2025-07-01 05:46:36.815 - abcDefghiJkl
2025-07-01 05:46:36.826 + abcdefGhijkl
2025-07-01 05:46:36.838 """
2025-07-01 05:46:36.844
2025-07-01 05:46:36.850 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:46:36.861 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:46:36.870 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:46:36.876 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:46:36.882 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:46:36.888
2025-07-01 05:46:36.898 # search for the pair that matches best without being identical
2025-07-01 05:46:36.908 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:46:36.919 # on junk -- unless we have to)
2025-07-01 05:46:36.929 for j in range(blo, bhi):
2025-07-01 05:46:36.937 bj = b[j]
2025-07-01 05:46:36.944 cruncher.set_seq2(bj)
2025-07-01 05:46:36.951 for i in range(alo, ahi):
2025-07-01 05:46:36.959 ai = a[i]
2025-07-01 05:46:36.971 if ai == bj:
2025-07-01 05:46:36.985 if eqi is None:
2025-07-01 05:46:36.994 eqi, eqj = i, j
2025-07-01 05:46:37.003 continue
2025-07-01 05:46:37.010 cruncher.set_seq1(ai)
2025-07-01 05:46:37.019 # computing similarity is expensive, so use the quick
2025-07-01 05:46:37.031 # upper bounds first -- have seen this speed up messy
2025-07-01 05:46:37.039 # compares by a factor of 3.
2025-07-01 05:46:37.052 # note that ratio() is only expensive to compute the first
2025-07-01 05:46:37.065 # time it's called on a sequence pair; the expensive part
2025-07-01 05:46:37.075 # of the computation is cached by cruncher
2025-07-01 05:46:37.082 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:46:37.088 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:46:37.094 cruncher.ratio() > best_ratio:
2025-07-01 05:46:37.098 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:46:37.103 if best_ratio < cutoff:
2025-07-01 05:46:37.108 # no non-identical "pretty close" pair
2025-07-01 05:46:37.113 if eqi is None:
2025-07-01 05:46:37.119 # no identical pair either -- treat it as a straight replace
2025-07-01 05:46:37.125 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:46:37.133 return
2025-07-01 05:46:37.144 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:46:37.156 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:46:37.166 else:
2025-07-01 05:46:37.173 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:46:37.182 eqi = None
2025-07-01 05:46:37.194
2025-07-01 05:46:37.204 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:46:37.211 # identical
2025-07-01 05:46:37.218
2025-07-01 05:46:37.224 # pump out diffs from before the synch point
2025-07-01 05:46:37.234 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:46:37.247
2025-07-01 05:46:37.258 # do intraline marking on the synch pair
2025-07-01 05:46:37.267 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:46:37.276 if eqi is None:
2025-07-01 05:46:37.286 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:46:37.296 atags = btags = ""
2025-07-01 05:46:37.305 cruncher.set_seqs(aelt, belt)
2025-07-01 05:46:37.312 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:46:37.318 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:46:37.324 if tag == 'replace':
2025-07-01 05:46:37.329 atags += '^' * la
2025-07-01 05:46:37.335 btags += '^' * lb
2025-07-01 05:46:37.343 elif tag == 'delete':
2025-07-01 05:46:37.354 atags += '-' * la
2025-07-01 05:46:37.366 elif tag == 'insert':
2025-07-01 05:46:37.374 btags += '+' * lb
2025-07-01 05:46:37.381 elif tag == 'equal':
2025-07-01 05:46:37.386 atags += ' ' * la
2025-07-01 05:46:37.391 btags += ' ' * lb
2025-07-01 05:46:37.395 else:
2025-07-01 05:46:37.400 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:46:37.406 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:46:37.411 else:
2025-07-01 05:46:37.418 # the synch pair is identical
2025-07-01 05:46:37.423 yield ' ' + aelt
2025-07-01 05:46:37.427
2025-07-01 05:46:37.432 # pump out diffs from after the synch point
2025-07-01 05:46:37.436 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:46:37.448
2025-07-01 05:46:37.460 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:46:37.470 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:46:37.480
2025-07-01 05:46:37.489 self = <difflib.Differ object at [hex]>
2025-07-01 05:46:37.500 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:46:37.509 alo = 174, ahi = 1101
2025-07-01 05:46:37.520 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:46:37.529 blo = 174, bhi = 1101
2025-07-01 05:46:37.540
2025-07-01 05:46:37.550 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:46:37.561 g = []
2025-07-01 05:46:37.572 if alo < ahi:
2025-07-01 05:46:37.580 if blo < bhi:
2025-07-01 05:46:37.587 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:46:37.595 else:
2025-07-01 05:46:37.606 g = self._dump('-', a, alo, ahi)
2025-07-01 05:46:37.614 elif blo < bhi:
2025-07-01 05:46:37.623 g = self._dump('+', b, blo, bhi)
2025-07-01 05:46:37.632
2025-07-01 05:46:37.643 > yield from g
2025-07-01 05:46:37.651
2025-07-01 05:46:37.658 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:46:37.666 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:46:37.672
2025-07-01 05:46:37.678 self = <difflib.Differ object at [hex]>
2025-07-01 05:46:37.687 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:46:37.695 alo = 174, ahi = 1101
2025-07-01 05:46:37.703 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:46:37.710 blo = 174, bhi = 1101
2025-07-01 05:46:37.717
2025-07-01 05:46:37.725 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:46:37.731 r"""
2025-07-01 05:46:37.737 When replacing one block of lines with another, search the blocks
2025-07-01 05:46:37.748 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:46:37.758 synch point, and intraline difference marking is done on the
2025-07-01 05:46:37.766 similar pair. Lots of work, but often worth it.
2025-07-01 05:46:37.775
2025-07-01 05:46:37.782 Example:
2025-07-01 05:46:37.789
2025-07-01 05:46:37.795 >>> d = Differ()
2025-07-01 05:46:37.801 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:46:37.807 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:46:37.813 >>> print(''.join(results), end="")
2025-07-01 05:46:37.819 - abcDefghiJkl
2025-07-01 05:46:37.830 + abcdefGhijkl
2025-07-01 05:46:37.849 """
2025-07-01 05:46:37.856
2025-07-01 05:46:37.863 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:46:37.869 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:46:37.875 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:46:37.880 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:46:37.886 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:46:37.891
2025-07-01 05:46:37.900 # search for the pair that matches best without being identical
2025-07-01 05:46:37.909 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:46:37.917 # on junk -- unless we have to)
2025-07-01 05:46:37.923 for j in range(blo, bhi):
2025-07-01 05:46:37.929 bj = b[j]
2025-07-01 05:46:37.935 cruncher.set_seq2(bj)
2025-07-01 05:46:37.947 for i in range(alo, ahi):
2025-07-01 05:46:37.956 ai = a[i]
2025-07-01 05:46:37.964 if ai == bj:
2025-07-01 05:46:37.970 if eqi is None:
2025-07-01 05:46:37.981 eqi, eqj = i, j
2025-07-01 05:46:37.991 continue
2025-07-01 05:46:37.998 cruncher.set_seq1(ai)
2025-07-01 05:46:38.006 # computing similarity is expensive, so use the quick
2025-07-01 05:46:38.017 # upper bounds first -- have seen this speed up messy
2025-07-01 05:46:38.028 # compares by a factor of 3.
2025-07-01 05:46:38.036 # note that ratio() is only expensive to compute the first
2025-07-01 05:46:38.043 # time it's called on a sequence pair; the expensive part
2025-07-01 05:46:38.049 # of the computation is cached by cruncher
2025-07-01 05:46:38.055 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:46:38.061 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:46:38.067 cruncher.ratio() > best_ratio:
2025-07-01 05:46:38.080 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:46:38.089 if best_ratio < cutoff:
2025-07-01 05:46:38.100 # no non-identical "pretty close" pair
2025-07-01 05:46:38.112 if eqi is None:
2025-07-01 05:46:38.123 # no identical pair either -- treat it as a straight replace
2025-07-01 05:46:38.131 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:46:38.139 return
2025-07-01 05:46:38.145 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:46:38.151 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:46:38.157 else:
2025-07-01 05:46:38.162 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:46:38.168 eqi = None
2025-07-01 05:46:38.173
2025-07-01 05:46:38.179 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:46:38.185 # identical
2025-07-01 05:46:38.190
2025-07-01 05:46:38.201 # pump out diffs from before the synch point
2025-07-01 05:46:38.211 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:46:38.219
2025-07-01 05:46:38.229 # do intraline marking on the synch pair
2025-07-01 05:46:38.241 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:46:38.252 if eqi is None:
2025-07-01 05:46:38.261 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:46:38.272 atags = btags = ""
2025-07-01 05:46:38.283 cruncher.set_seqs(aelt, belt)
2025-07-01 05:46:38.293 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:46:38.305 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:46:38.315 if tag == 'replace':
2025-07-01 05:46:38.328 atags += '^' * la
2025-07-01 05:46:38.339 btags += '^' * lb
2025-07-01 05:46:38.349 elif tag == 'delete':
2025-07-01 05:46:38.357 atags += '-' * la
2025-07-01 05:46:38.365 elif tag == 'insert':
2025-07-01 05:46:38.371 btags += '+' * lb
2025-07-01 05:46:38.378 elif tag == 'equal':
2025-07-01 05:46:38.383 atags += ' ' * la
2025-07-01 05:46:38.390 btags += ' ' * lb
2025-07-01 05:46:38.398 else:
2025-07-01 05:46:38.405 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:46:38.411 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:46:38.417 else:
2025-07-01 05:46:38.423 # the synch pair is identical
2025-07-01 05:46:38.429 yield ' ' + aelt
2025-07-01 05:46:38.435
2025-07-01 05:46:38.440 # pump out diffs from after the synch point
2025-07-01 05:46:38.445 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:46:38.450
2025-07-01 05:46:38.455 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:46:38.461 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:46:38.466
2025-07-01 05:46:38.471 self = <difflib.Differ object at [hex]>
2025-07-01 05:46:38.477 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:46:38.482 alo = 175, ahi = 1101
2025-07-01 05:46:38.487 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:46:38.494 blo = 175, bhi = 1101
2025-07-01 05:46:38.498
2025-07-01 05:46:38.503 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:46:38.508 g = []
2025-07-01 05:46:38.514 if alo < ahi:
2025-07-01 05:46:38.520 if blo < bhi:
2025-07-01 05:46:38.528 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:46:38.540 else:
2025-07-01 05:46:38.549 g = self._dump('-', a, alo, ahi)
2025-07-01 05:46:38.557 elif blo < bhi:
2025-07-01 05:46:38.563 g = self._dump('+', b, blo, bhi)
2025-07-01 05:46:38.569
2025-07-01 05:46:38.574 > yield from g
2025-07-01 05:46:38.579
2025-07-01 05:46:38.585 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:46:38.590 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:46:38.595
2025-07-01 05:46:38.600 self = <difflib.Differ object at [hex]>
2025-07-01 05:46:38.605 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:46:38.610 alo = 175, ahi = 1101
2025-07-01 05:46:38.616 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:46:38.622 blo = 175, bhi = 1101
2025-07-01 05:46:38.627
2025-07-01 05:46:38.632 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:46:38.637 r"""
2025-07-01 05:46:38.642 When replacing one block of lines with another, search the blocks
2025-07-01 05:46:38.648 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:46:38.652 synch point, and intraline difference marking is done on the
2025-07-01 05:46:38.657 similar pair. Lots of work, but often worth it.
2025-07-01 05:46:38.662
2025-07-01 05:46:38.667 Example:
2025-07-01 05:46:38.672
2025-07-01 05:46:38.677 >>> d = Differ()
2025-07-01 05:46:38.683 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:46:38.688 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:46:38.694 >>> print(''.join(results), end="")
2025-07-01 05:46:38.699 - abcDefghiJkl
2025-07-01 05:46:38.718 + abcdefGhijkl
2025-07-01 05:46:38.734 """
2025-07-01 05:46:38.743
2025-07-01 05:46:38.751 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:46:38.758 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:46:38.763 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:46:38.768 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:46:38.773 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:46:38.777
2025-07-01 05:46:38.783 # search for the pair that matches best without being identical
2025-07-01 05:46:38.789 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:46:38.794 # on junk -- unless we have to)
2025-07-01 05:46:38.798 for j in range(blo, bhi):
2025-07-01 05:46:38.803 bj = b[j]
2025-07-01 05:46:38.808 cruncher.set_seq2(bj)
2025-07-01 05:46:38.815 for i in range(alo, ahi):
2025-07-01 05:46:38.820 ai = a[i]
2025-07-01 05:46:38.826 if ai == bj:
2025-07-01 05:46:38.831 if eqi is None:
2025-07-01 05:46:38.836 eqi, eqj = i, j
2025-07-01 05:46:38.841 continue
2025-07-01 05:46:38.846 cruncher.set_seq1(ai)
2025-07-01 05:46:38.851 # computing similarity is expensive, so use the quick
2025-07-01 05:46:38.855 # upper bounds first -- have seen this speed up messy
2025-07-01 05:46:38.860 # compares by a factor of 3.
2025-07-01 05:46:38.865 # note that ratio() is only expensive to compute the first
2025-07-01 05:46:38.870 # time it's called on a sequence pair; the expensive part
2025-07-01 05:46:38.875 # of the computation is cached by cruncher
2025-07-01 05:46:38.880 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:46:38.886 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:46:38.895 cruncher.ratio() > best_ratio:
2025-07-01 05:46:38.904 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:46:38.910 if best_ratio < cutoff:
2025-07-01 05:46:38.919 # no non-identical "pretty close" pair
2025-07-01 05:46:38.930 if eqi is None:
2025-07-01 05:46:38.941 # no identical pair either -- treat it as a straight replace
2025-07-01 05:46:38.953 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:46:38.963 return
2025-07-01 05:46:38.978 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:46:38.987 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:46:38.995 else:
2025-07-01 05:46:39.003 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:46:39.011 eqi = None
2025-07-01 05:46:39.018
2025-07-01 05:46:39.027 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:46:39.035 # identical
2025-07-01 05:46:39.042
2025-07-01 05:46:39.049 # pump out diffs from before the synch point
2025-07-01 05:46:39.058 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:46:39.070
2025-07-01 05:46:39.080 # do intraline marking on the synch pair
2025-07-01 05:46:39.088 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:46:39.101 if eqi is None:
2025-07-01 05:46:39.110 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:46:39.118 atags = btags = ""
2025-07-01 05:46:39.126 cruncher.set_seqs(aelt, belt)
2025-07-01 05:46:39.134 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:46:39.143 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:46:39.151 if tag == 'replace':
2025-07-01 05:46:39.160 atags += '^' * la
2025-07-01 05:46:39.168 btags += '^' * lb
2025-07-01 05:46:39.183 elif tag == 'delete':
2025-07-01 05:46:39.194 atags += '-' * la
2025-07-01 05:46:39.205 elif tag == 'insert':
2025-07-01 05:46:39.214 btags += '+' * lb
2025-07-01 05:46:39.221 elif tag == 'equal':
2025-07-01 05:46:39.229 atags += ' ' * la
2025-07-01 05:46:39.237 btags += ' ' * lb
2025-07-01 05:46:39.246 else:
2025-07-01 05:46:39.255 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:46:39.263 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:46:39.271 else:
2025-07-01 05:46:39.279 # the synch pair is identical
2025-07-01 05:46:39.288 yield ' ' + aelt
2025-07-01 05:46:39.296
2025-07-01 05:46:39.305 # pump out diffs from after the synch point
2025-07-01 05:46:39.318 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:46:39.329
2025-07-01 05:46:39.336 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:46:39.343 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:46:39.349
2025-07-01 05:46:39.354 self = <difflib.Differ object at [hex]>
2025-07-01 05:46:39.361 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:46:39.366 alo = 176, ahi = 1101
2025-07-01 05:46:39.372 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:46:39.378 blo = 176, bhi = 1101
2025-07-01 05:46:39.383
2025-07-01 05:46:39.388 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:46:39.394 g = []
2025-07-01 05:46:39.399 if alo < ahi:
2025-07-01 05:46:39.406 if blo < bhi:
2025-07-01 05:46:39.414 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:46:39.420 else:
2025-07-01 05:46:39.427 g = self._dump('-', a, alo, ahi)
2025-07-01 05:46:39.433 elif blo < bhi:
2025-07-01 05:46:39.440 g = self._dump('+', b, blo, bhi)
2025-07-01 05:46:39.447
2025-07-01 05:46:39.453 > yield from g
2025-07-01 05:46:39.460
2025-07-01 05:46:39.467 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:46:39.473 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:46:39.481
2025-07-01 05:46:39.490 self = <difflib.Differ object at [hex]>
2025-07-01 05:46:39.497 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:46:39.504 alo = 176, ahi = 1101
2025-07-01 05:46:39.510 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:46:39.516 blo = 176, bhi = 1101
2025-07-01 05:46:39.521
2025-07-01 05:46:39.527 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:46:39.532 r"""
2025-07-01 05:46:39.539 When replacing one block of lines with another, search the blocks
2025-07-01 05:46:39.546 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:46:39.554 synch point, and intraline difference marking is done on the
2025-07-01 05:46:39.562 similar pair. Lots of work, but often worth it.
2025-07-01 05:46:39.569
2025-07-01 05:46:39.575 Example:
2025-07-01 05:46:39.584
2025-07-01 05:46:39.592 >>> d = Differ()
2025-07-01 05:46:39.599 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:46:39.606 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:46:39.613 >>> print(''.join(results), end="")
2025-07-01 05:46:39.620 - abcDefghiJkl
2025-07-01 05:46:39.634 + abcdefGhijkl
2025-07-01 05:46:39.648 """
2025-07-01 05:46:39.656
2025-07-01 05:46:39.666 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:46:39.674 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:46:39.680 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:46:39.687 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:46:39.694 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:46:39.701
2025-07-01 05:46:39.707 # search for the pair that matches best without being identical
2025-07-01 05:46:39.714 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:46:39.723 # on junk -- unless we have to)
2025-07-01 05:46:39.734 for j in range(blo, bhi):
2025-07-01 05:46:39.746 bj = b[j]
2025-07-01 05:46:39.755 cruncher.set_seq2(bj)
2025-07-01 05:46:39.761 for i in range(alo, ahi):
2025-07-01 05:46:39.767 ai = a[i]
2025-07-01 05:46:39.773 if ai == bj:
2025-07-01 05:46:39.784 if eqi is None:
2025-07-01 05:46:39.795 eqi, eqj = i, j
2025-07-01 05:46:39.803 continue
2025-07-01 05:46:39.810 cruncher.set_seq1(ai)
2025-07-01 05:46:39.818 # computing similarity is expensive, so use the quick
2025-07-01 05:46:39.825 # upper bounds first -- have seen this speed up messy
2025-07-01 05:46:39.832 # compares by a factor of 3.
2025-07-01 05:46:39.839 # note that ratio() is only expensive to compute the first
2025-07-01 05:46:39.846 # time it's called on a sequence pair; the expensive part
2025-07-01 05:46:39.856 # of the computation is cached by cruncher
2025-07-01 05:46:39.865 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:46:39.882 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:46:39.889 cruncher.ratio() > best_ratio:
2025-07-01 05:46:39.897 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:46:39.905 if best_ratio < cutoff:
2025-07-01 05:46:39.912 # no non-identical "pretty close" pair
2025-07-01 05:46:39.919 if eqi is None:
2025-07-01 05:46:39.926 # no identical pair either -- treat it as a straight replace
2025-07-01 05:46:39.932 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:46:39.939 return
2025-07-01 05:46:39.944 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:46:39.951 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:46:39.956 else:
2025-07-01 05:46:39.962 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:46:39.967 eqi = None
2025-07-01 05:46:39.972
2025-07-01 05:46:39.979 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:46:39.986 # identical
2025-07-01 05:46:39.993
2025-07-01 05:46:40.004 # pump out diffs from before the synch point
2025-07-01 05:46:40.014 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:46:40.020
2025-07-01 05:46:40.027 # do intraline marking on the synch pair
2025-07-01 05:46:40.033 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:46:40.040 if eqi is None:
2025-07-01 05:46:40.048 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:46:40.054 atags = btags = ""
2025-07-01 05:46:40.062 cruncher.set_seqs(aelt, belt)
2025-07-01 05:46:40.069 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:46:40.075 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:46:40.082 if tag == 'replace':
2025-07-01 05:46:40.090 atags += '^' * la
2025-07-01 05:46:40.096 btags += '^' * lb
2025-07-01 05:46:40.102 elif tag == 'delete':
2025-07-01 05:46:40.108 atags += '-' * la
2025-07-01 05:46:40.113 elif tag == 'insert':
2025-07-01 05:46:40.119 btags += '+' * lb
2025-07-01 05:46:40.126 elif tag == 'equal':
2025-07-01 05:46:40.132 atags += ' ' * la
2025-07-01 05:46:40.143 btags += ' ' * lb
2025-07-01 05:46:40.153 else:
2025-07-01 05:46:40.161 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:46:40.168 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:46:40.174 else:
2025-07-01 05:46:40.180 # the synch pair is identical
2025-07-01 05:46:40.192 yield ' ' + aelt
2025-07-01 05:46:40.204
2025-07-01 05:46:40.214 # pump out diffs from after the synch point
2025-07-01 05:46:40.223 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:46:40.230
2025-07-01 05:46:40.240 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:46:40.250 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:46:40.258
2025-07-01 05:46:40.268 self = <difflib.Differ object at [hex]>
2025-07-01 05:46:40.277 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:46:40.284 alo = 177, ahi = 1101
2025-07-01 05:46:40.295 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:46:40.307 blo = 177, bhi = 1101
2025-07-01 05:46:40.322
2025-07-01 05:46:40.333 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:46:40.345 g = []
2025-07-01 05:46:40.358 if alo < ahi:
2025-07-01 05:46:40.369 if blo < bhi:
2025-07-01 05:46:40.378 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:46:40.387 else:
2025-07-01 05:46:40.400 g = self._dump('-', a, alo, ahi)
2025-07-01 05:46:40.414 elif blo < bhi:
2025-07-01 05:46:40.425 g = self._dump('+', b, blo, bhi)
2025-07-01 05:46:40.435
2025-07-01 05:46:40.448 > yield from g
2025-07-01 05:46:40.458
2025-07-01 05:46:40.470 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:46:40.481 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:46:40.491
2025-07-01 05:46:40.500 self = <difflib.Differ object at [hex]>
2025-07-01 05:46:40.508 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:46:40.515 alo = 177, ahi = 1101
2025-07-01 05:46:40.522 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:46:40.533 blo = 177, bhi = 1101
2025-07-01 05:46:40.542
2025-07-01 05:46:40.551 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:46:40.559 r"""
2025-07-01 05:46:40.570 When replacing one block of lines with another, search the blocks
2025-07-01 05:46:40.579 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:46:40.588 synch point, and intraline difference marking is done on the
2025-07-01 05:46:40.598 similar pair. Lots of work, but often worth it.
2025-07-01 05:46:40.609
2025-07-01 05:46:40.622 Example:
2025-07-01 05:46:40.633
2025-07-01 05:46:40.640 >>> d = Differ()
2025-07-01 05:46:40.647 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:46:40.659 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:46:40.670 >>> print(''.join(results), end="")
2025-07-01 05:46:40.680 - abcDefghiJkl
2025-07-01 05:46:40.704 + abcdefGhijkl
2025-07-01 05:46:40.726 """
2025-07-01 05:46:40.737
2025-07-01 05:46:40.746 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:46:40.755 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:46:40.762 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:46:40.768 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:46:40.774 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:46:40.780
2025-07-01 05:46:40.787 # search for the pair that matches best without being identical
2025-07-01 05:46:40.793 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:46:40.802 # on junk -- unless we have to)
2025-07-01 05:46:40.815 for j in range(blo, bhi):
2025-07-01 05:46:40.824 bj = b[j]
2025-07-01 05:46:40.833 cruncher.set_seq2(bj)
2025-07-01 05:46:40.841 for i in range(alo, ahi):
2025-07-01 05:46:40.847 ai = a[i]
2025-07-01 05:46:40.854 if ai == bj:
2025-07-01 05:46:40.867 if eqi is None:
2025-07-01 05:46:40.877 eqi, eqj = i, j
2025-07-01 05:46:40.888 continue
2025-07-01 05:46:40.896 cruncher.set_seq1(ai)
2025-07-01 05:46:40.907 # computing similarity is expensive, so use the quick
2025-07-01 05:46:40.918 # upper bounds first -- have seen this speed up messy
2025-07-01 05:46:40.928 # compares by a factor of 3.
2025-07-01 05:46:40.936 # note that ratio() is only expensive to compute the first
2025-07-01 05:46:40.942 # time it's called on a sequence pair; the expensive part
2025-07-01 05:46:40.947 # of the computation is cached by cruncher
2025-07-01 05:46:40.952 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:46:40.957 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:46:40.963 cruncher.ratio() > best_ratio:
2025-07-01 05:46:40.975 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:46:40.983 if best_ratio < cutoff:
2025-07-01 05:46:40.992 # no non-identical "pretty close" pair
2025-07-01 05:46:41.000 if eqi is None:
2025-07-01 05:46:41.008 # no identical pair either -- treat it as a straight replace
2025-07-01 05:46:41.014 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:46:41.026 return
2025-07-01 05:46:41.039 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:46:41.050 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:46:41.062 else:
2025-07-01 05:46:41.074 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:46:41.080 eqi = None
2025-07-01 05:46:41.085
2025-07-01 05:46:41.090 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:46:41.096 # identical
2025-07-01 05:46:41.102
2025-07-01 05:46:41.108 # pump out diffs from before the synch point
2025-07-01 05:46:41.114 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:46:41.120
2025-07-01 05:46:41.127 # do intraline marking on the synch pair
2025-07-01 05:46:41.138 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:46:41.150 if eqi is None:
2025-07-01 05:46:41.160 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:46:41.172 atags = btags = ""
2025-07-01 05:46:41.182 cruncher.set_seqs(aelt, belt)
2025-07-01 05:46:41.193 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:46:41.200 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:46:41.213 if tag == 'replace':
2025-07-01 05:46:41.224 atags += '^' * la
2025-07-01 05:46:41.235 btags += '^' * lb
2025-07-01 05:46:41.243 elif tag == 'delete':
2025-07-01 05:46:41.251 atags += '-' * la
2025-07-01 05:46:41.259 elif tag == 'insert':
2025-07-01 05:46:41.270 btags += '+' * lb
2025-07-01 05:46:41.279 elif tag == 'equal':
2025-07-01 05:46:41.289 atags += ' ' * la
2025-07-01 05:46:41.297 btags += ' ' * lb
2025-07-01 05:46:41.304 else:
2025-07-01 05:46:41.311 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:46:41.318 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:46:41.329 else:
2025-07-01 05:46:41.341 # the synch pair is identical
2025-07-01 05:46:41.349 yield ' ' + aelt
2025-07-01 05:46:41.356
2025-07-01 05:46:41.362 # pump out diffs from after the synch point
2025-07-01 05:46:41.369 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:46:41.380
2025-07-01 05:46:41.389 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:46:41.396 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:46:41.402
2025-07-01 05:46:41.407 self = <difflib.Differ object at [hex]>
2025-07-01 05:46:41.413 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:46:41.418 alo = 180, ahi = 1101
2025-07-01 05:46:41.426 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:46:41.437 blo = 180, bhi = 1101
2025-07-01 05:46:41.446
2025-07-01 05:46:41.457 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:46:41.468 g = []
2025-07-01 05:46:41.480 if alo < ahi:
2025-07-01 05:46:41.488 if blo < bhi:
2025-07-01 05:46:41.497 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:46:41.504 else:
2025-07-01 05:46:41.511 g = self._dump('-', a, alo, ahi)
2025-07-01 05:46:41.522 elif blo < bhi:
2025-07-01 05:46:41.533 g = self._dump('+', b, blo, bhi)
2025-07-01 05:46:41.543
2025-07-01 05:46:41.552 > yield from g
2025-07-01 05:46:41.560
2025-07-01 05:46:41.566 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:46:41.577 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:46:41.586
2025-07-01 05:46:41.598 self = <difflib.Differ object at [hex]>
2025-07-01 05:46:41.611 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:46:41.620 alo = 180, ahi = 1101
2025-07-01 05:46:41.633 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:46:41.642 blo = 180, bhi = 1101
2025-07-01 05:46:41.652
2025-07-01 05:46:41.663 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:46:41.673 r"""
2025-07-01 05:46:41.683 When replacing one block of lines with another, search the blocks
2025-07-01 05:46:41.691 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:46:41.703 synch point, and intraline difference marking is done on the
2025-07-01 05:46:41.712 similar pair. Lots of work, but often worth it.
2025-07-01 05:46:41.719
2025-07-01 05:46:41.732 Example:
2025-07-01 05:46:41.744
2025-07-01 05:46:41.756 >>> d = Differ()
2025-07-01 05:46:41.770 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:46:41.784 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:46:41.792 >>> print(''.join(results), end="")
2025-07-01 05:46:41.799 - abcDefghiJkl
2025-07-01 05:46:41.821 + abcdefGhijkl
2025-07-01 05:46:41.836 """
2025-07-01 05:46:41.842
2025-07-01 05:46:41.848 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:46:41.854 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:46:41.860 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:46:41.866 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:46:41.875 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:46:41.885
2025-07-01 05:46:41.896 # search for the pair that matches best without being identical
2025-07-01 05:46:41.908 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:46:41.921 # on junk -- unless we have to)
2025-07-01 05:46:41.931 for j in range(blo, bhi):
2025-07-01 05:46:41.939 bj = b[j]
2025-07-01 05:46:41.946 cruncher.set_seq2(bj)
2025-07-01 05:46:41.954 for i in range(alo, ahi):
2025-07-01 05:46:41.961 ai = a[i]
2025-07-01 05:46:41.970 if ai == bj:
2025-07-01 05:46:41.982 if eqi is None:
2025-07-01 05:46:41.993 eqi, eqj = i, j
2025-07-01 05:46:42.004 continue
2025-07-01 05:46:42.015 cruncher.set_seq1(ai)
2025-07-01 05:46:42.027 # computing similarity is expensive, so use the quick
2025-07-01 05:46:42.038 # upper bounds first -- have seen this speed up messy
2025-07-01 05:46:42.047 # compares by a factor of 3.
2025-07-01 05:46:42.056 # note that ratio() is only expensive to compute the first
2025-07-01 05:46:42.062 # time it's called on a sequence pair; the expensive part
2025-07-01 05:46:42.067 # of the computation is cached by cruncher
2025-07-01 05:46:42.078 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:46:42.087 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:46:42.093 cruncher.ratio() > best_ratio:
2025-07-01 05:46:42.100 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:46:42.112 if best_ratio < cutoff:
2025-07-01 05:46:42.126 # no non-identical "pretty close" pair
2025-07-01 05:46:42.133 if eqi is None:
2025-07-01 05:46:42.139 # no identical pair either -- treat it as a straight replace
2025-07-01 05:46:42.144 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:46:42.150 return
2025-07-01 05:46:42.156 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:46:42.165 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:46:42.174 else:
2025-07-01 05:46:42.181 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:46:42.188 eqi = None
2025-07-01 05:46:42.193
2025-07-01 05:46:42.199 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:46:42.206 # identical
2025-07-01 05:46:42.217
2025-07-01 05:46:42.225 # pump out diffs from before the synch point
2025-07-01 05:46:42.232 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:46:42.238
2025-07-01 05:46:42.248 # do intraline marking on the synch pair
2025-07-01 05:46:42.258 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:46:42.266 if eqi is None:
2025-07-01 05:46:42.275 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:46:42.287 atags = btags = ""
2025-07-01 05:46:42.297 cruncher.set_seqs(aelt, belt)
2025-07-01 05:46:42.304 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:46:42.314 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:46:42.327 if tag == 'replace':
2025-07-01 05:46:42.337 atags += '^' * la
2025-07-01 05:46:42.346 btags += '^' * lb
2025-07-01 05:46:42.353 elif tag == 'delete':
2025-07-01 05:46:42.360 atags += '-' * la
2025-07-01 05:46:42.371 elif tag == 'insert':
2025-07-01 05:46:42.382 btags += '+' * lb
2025-07-01 05:46:42.391 elif tag == 'equal':
2025-07-01 05:46:42.398 atags += ' ' * la
2025-07-01 05:46:42.404 btags += ' ' * lb
2025-07-01 05:46:42.409 else:
2025-07-01 05:46:42.415 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:46:42.423 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:46:42.434 else:
2025-07-01 05:46:42.441 # the synch pair is identical
2025-07-01 05:46:42.448 yield ' ' + aelt
2025-07-01 05:46:42.454
2025-07-01 05:46:42.461 # pump out diffs from after the synch point
2025-07-01 05:46:42.468 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:46:42.480
2025-07-01 05:46:42.490 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:46:42.501 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:46:42.510
2025-07-01 05:46:42.516 self = <difflib.Differ object at [hex]>
2025-07-01 05:46:42.523 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:46:42.529 alo = 181, ahi = 1101
2025-07-01 05:46:42.537 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:46:42.543 blo = 181, bhi = 1101
2025-07-01 05:46:42.550
2025-07-01 05:46:42.561 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:46:42.570 g = []
2025-07-01 05:46:42.578 if alo < ahi:
2025-07-01 05:46:42.589 if blo < bhi:
2025-07-01 05:46:42.600 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:46:42.609 else:
2025-07-01 05:46:42.617 g = self._dump('-', a, alo, ahi)
2025-07-01 05:46:42.624 elif blo < bhi:
2025-07-01 05:46:42.630 g = self._dump('+', b, blo, bhi)
2025-07-01 05:46:42.636
2025-07-01 05:46:42.643 > yield from g
2025-07-01 05:46:42.652
2025-07-01 05:46:42.664 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:46:42.673 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:46:42.685
2025-07-01 05:46:42.694 self = <difflib.Differ object at [hex]>
2025-07-01 05:46:42.702 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:46:42.708 alo = 181, ahi = 1101
2025-07-01 05:46:42.715 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:46:42.722 blo = 181, bhi = 1101
2025-07-01 05:46:42.729
2025-07-01 05:46:42.735 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:46:42.741 r"""
2025-07-01 05:46:42.751 When replacing one block of lines with another, search the blocks
2025-07-01 05:46:42.764 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:46:42.774 synch point, and intraline difference marking is done on the
2025-07-01 05:46:42.781 similar pair. Lots of work, but often worth it.
2025-07-01 05:46:42.787
2025-07-01 05:46:42.793 Example:
2025-07-01 05:46:42.799
2025-07-01 05:46:42.805 >>> d = Differ()
2025-07-01 05:46:42.809 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:46:42.814 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:46:42.818 >>> print(''.join(results), end="")
2025-07-01 05:46:42.823 - abcDefghiJkl
2025-07-01 05:46:42.833 + abcdefGhijkl
2025-07-01 05:46:42.847 """
2025-07-01 05:46:42.855
2025-07-01 05:46:42.865 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:46:42.873 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:46:42.880 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:46:42.886 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:46:42.892 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:46:42.898
2025-07-01 05:46:42.907 # search for the pair that matches best without being identical
2025-07-01 05:46:42.918 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:46:42.925 # on junk -- unless we have to)
2025-07-01 05:46:42.932 for j in range(blo, bhi):
2025-07-01 05:46:42.937 bj = b[j]
2025-07-01 05:46:42.943 cruncher.set_seq2(bj)
2025-07-01 05:46:42.952 for i in range(alo, ahi):
2025-07-01 05:46:42.962 ai = a[i]
2025-07-01 05:46:42.969 if ai == bj:
2025-07-01 05:46:42.978 if eqi is None:
2025-07-01 05:46:42.984 eqi, eqj = i, j
2025-07-01 05:46:42.990 continue
2025-07-01 05:46:43.000 cruncher.set_seq1(ai)
2025-07-01 05:46:43.010 # computing similarity is expensive, so use the quick
2025-07-01 05:46:43.018 # upper bounds first -- have seen this speed up messy
2025-07-01 05:46:43.029 # compares by a factor of 3.
2025-07-01 05:46:43.036 # note that ratio() is only expensive to compute the first
2025-07-01 05:46:43.042 # time it's called on a sequence pair; the expensive part
2025-07-01 05:46:43.049 # of the computation is cached by cruncher
2025-07-01 05:46:43.056 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:46:43.063 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:46:43.069 cruncher.ratio() > best_ratio:
2025-07-01 05:46:43.075 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:46:43.083 if best_ratio < cutoff:
2025-07-01 05:46:43.091 # no non-identical "pretty close" pair
2025-07-01 05:46:43.095 if eqi is None:
2025-07-01 05:46:43.100 # no identical pair either -- treat it as a straight replace
2025-07-01 05:46:43.105 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:46:43.110 return
2025-07-01 05:46:43.115 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:46:43.119 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:46:43.124 else:
2025-07-01 05:46:43.129 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:46:43.134 eqi = None
2025-07-01 05:46:43.139
2025-07-01 05:46:43.145 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:46:43.150 # identical
2025-07-01 05:46:43.156
2025-07-01 05:46:43.161 # pump out diffs from before the synch point
2025-07-01 05:46:43.166 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:46:43.170
2025-07-01 05:46:43.176 # do intraline marking on the synch pair
2025-07-01 05:46:43.181 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:46:43.187 if eqi is None:
2025-07-01 05:46:43.193 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:46:43.199 atags = btags = ""
2025-07-01 05:46:43.206 cruncher.set_seqs(aelt, belt)
2025-07-01 05:46:43.215 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:46:43.222 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:46:43.230 if tag == 'replace':
2025-07-01 05:46:43.238 atags += '^' * la
2025-07-01 05:46:43.244 btags += '^' * lb
2025-07-01 05:46:43.251 elif tag == 'delete':
2025-07-01 05:46:43.259 atags += '-' * la
2025-07-01 05:46:43.271 elif tag == 'insert':
2025-07-01 05:46:43.281 btags += '+' * lb
2025-07-01 05:46:43.288 elif tag == 'equal':
2025-07-01 05:46:43.295 atags += ' ' * la
2025-07-01 05:46:43.300 btags += ' ' * lb
2025-07-01 05:46:43.305 else:
2025-07-01 05:46:43.310 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:46:43.315 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:46:43.320 else:
2025-07-01 05:46:43.325 # the synch pair is identical
2025-07-01 05:46:43.330 yield ' ' + aelt
2025-07-01 05:46:43.335
2025-07-01 05:46:43.341 # pump out diffs from after the synch point
2025-07-01 05:46:43.347 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:46:43.353
2025-07-01 05:46:43.359 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:46:43.366 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:46:43.376
2025-07-01 05:46:43.385 self = <difflib.Differ object at [hex]>
2025-07-01 05:46:43.392 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:46:43.398 alo = 182, ahi = 1101
2025-07-01 05:46:43.404 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:46:43.410 blo = 182, bhi = 1101
2025-07-01 05:46:43.417
2025-07-01 05:46:43.423 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:46:43.431 g = []
2025-07-01 05:46:43.441 if alo < ahi:
2025-07-01 05:46:43.450 if blo < bhi:
2025-07-01 05:46:43.461 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:46:43.468 else:
2025-07-01 05:46:43.474 g = self._dump('-', a, alo, ahi)
2025-07-01 05:46:43.480 elif blo < bhi:
2025-07-01 05:46:43.486 g = self._dump('+', b, blo, bhi)
2025-07-01 05:46:43.495
2025-07-01 05:46:43.503 > yield from g
2025-07-01 05:46:43.509
2025-07-01 05:46:43.515 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:46:43.522 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:46:43.529
2025-07-01 05:46:43.534 self = <difflib.Differ object at [hex]>
2025-07-01 05:46:43.541 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:46:43.546 alo = 182, ahi = 1101
2025-07-01 05:46:43.552 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:46:43.559 blo = 182, bhi = 1101
2025-07-01 05:46:43.570
2025-07-01 05:46:43.580 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:46:43.587 r"""
2025-07-01 05:46:43.595 When replacing one block of lines with another, search the blocks
2025-07-01 05:46:43.601 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:46:43.615 synch point, and intraline difference marking is done on the
2025-07-01 05:46:43.623 similar pair. Lots of work, but often worth it.
2025-07-01 05:46:43.629
2025-07-01 05:46:43.637 Example:
2025-07-01 05:46:43.643
2025-07-01 05:46:43.648 >>> d = Differ()
2025-07-01 05:46:43.654 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:46:43.660 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:46:43.665 >>> print(''.join(results), end="")
2025-07-01 05:46:43.671 - abcDefghiJkl
2025-07-01 05:46:43.684 + abcdefGhijkl
2025-07-01 05:46:43.697 """
2025-07-01 05:46:43.703
2025-07-01 05:46:43.711 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:46:43.720 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:46:43.730 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:46:43.738 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:46:43.745 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:46:43.750
2025-07-01 05:46:43.755 # search for the pair that matches best without being identical
2025-07-01 05:46:43.760 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:46:43.764 # on junk -- unless we have to)
2025-07-01 05:46:43.769 for j in range(blo, bhi):
2025-07-01 05:46:43.775 bj = b[j]
2025-07-01 05:46:43.781 cruncher.set_seq2(bj)
2025-07-01 05:46:43.786 for i in range(alo, ahi):
2025-07-01 05:46:43.792 ai = a[i]
2025-07-01 05:46:43.799 if ai == bj:
2025-07-01 05:46:43.806 if eqi is None:
2025-07-01 05:46:43.812 eqi, eqj = i, j
2025-07-01 05:46:43.819 continue
2025-07-01 05:46:43.826 cruncher.set_seq1(ai)
2025-07-01 05:46:43.835 # computing similarity is expensive, so use the quick
2025-07-01 05:46:43.845 # upper bounds first -- have seen this speed up messy
2025-07-01 05:46:43.857 # compares by a factor of 3.
2025-07-01 05:46:43.865 # note that ratio() is only expensive to compute the first
2025-07-01 05:46:43.874 # time it's called on a sequence pair; the expensive part
2025-07-01 05:46:43.880 # of the computation is cached by cruncher
2025-07-01 05:46:43.887 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:46:43.894 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:46:43.901 cruncher.ratio() > best_ratio:
2025-07-01 05:46:43.912 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:46:43.922 if best_ratio < cutoff:
2025-07-01 05:46:43.929 # no non-identical "pretty close" pair
2025-07-01 05:46:43.936 if eqi is None:
2025-07-01 05:46:43.942 # no identical pair either -- treat it as a straight replace
2025-07-01 05:46:43.953 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:46:43.964 return
2025-07-01 05:46:43.972 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:46:43.979 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:46:43.985 else:
2025-07-01 05:46:43.991 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:46:43.997 eqi = None
2025-07-01 05:46:44.003
2025-07-01 05:46:44.010 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:46:44.018 # identical
2025-07-01 05:46:44.027
2025-07-01 05:46:44.033 # pump out diffs from before the synch point
2025-07-01 05:46:44.039 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:46:44.045
2025-07-01 05:46:44.052 # do intraline marking on the synch pair
2025-07-01 05:46:44.059 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:46:44.070 if eqi is None:
2025-07-01 05:46:44.077 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:46:44.084 atags = btags = ""
2025-07-01 05:46:44.090 cruncher.set_seqs(aelt, belt)
2025-07-01 05:46:44.099 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:46:44.107 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:46:44.118 if tag == 'replace':
2025-07-01 05:46:44.126 atags += '^' * la
2025-07-01 05:46:44.133 btags += '^' * lb
2025-07-01 05:46:44.139 elif tag == 'delete':
2025-07-01 05:46:44.145 atags += '-' * la
2025-07-01 05:46:44.151 elif tag == 'insert':
2025-07-01 05:46:44.157 btags += '+' * lb
2025-07-01 05:46:44.163 elif tag == 'equal':
2025-07-01 05:46:44.169 atags += ' ' * la
2025-07-01 05:46:44.175 btags += ' ' * lb
2025-07-01 05:46:44.182 else:
2025-07-01 05:46:44.189 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:46:44.196 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:46:44.202 else:
2025-07-01 05:46:44.214 # the synch pair is identical
2025-07-01 05:46:44.222 yield ' ' + aelt
2025-07-01 05:46:44.229
2025-07-01 05:46:44.235 # pump out diffs from after the synch point
2025-07-01 05:46:44.241 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:46:44.247
2025-07-01 05:46:44.252 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:46:44.258 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:46:44.263
2025-07-01 05:46:44.270 self = <difflib.Differ object at [hex]>
2025-07-01 05:46:44.281 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:46:44.288 alo = 183, ahi = 1101
2025-07-01 05:46:44.296 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:46:44.301 blo = 183, bhi = 1101
2025-07-01 05:46:44.307
2025-07-01 05:46:44.312 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:46:44.318 g = []
2025-07-01 05:46:44.329 if alo < ahi:
2025-07-01 05:46:44.337 if blo < bhi:
2025-07-01 05:46:44.344 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:46:44.350 else:
2025-07-01 05:46:44.355 g = self._dump('-', a, alo, ahi)
2025-07-01 05:46:44.360 elif blo < bhi:
2025-07-01 05:46:44.365 g = self._dump('+', b, blo, bhi)
2025-07-01 05:46:44.371
2025-07-01 05:46:44.378 > yield from g
2025-07-01 05:46:44.384
2025-07-01 05:46:44.391 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:46:44.398 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:46:44.408
2025-07-01 05:46:44.417 self = <difflib.Differ object at [hex]>
2025-07-01 05:46:44.425 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:46:44.431 alo = 183, ahi = 1101
2025-07-01 05:46:44.437 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:46:44.442 blo = 183, bhi = 1101
2025-07-01 05:46:44.446
2025-07-01 05:46:44.451 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:46:44.456 r"""
2025-07-01 05:46:44.462 When replacing one block of lines with another, search the blocks
2025-07-01 05:46:44.468 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:46:44.474 synch point, and intraline difference marking is done on the
2025-07-01 05:46:44.483 similar pair. Lots of work, but often worth it.
2025-07-01 05:46:44.495
2025-07-01 05:46:44.502 Example:
2025-07-01 05:46:44.509
2025-07-01 05:46:44.514 >>> d = Differ()
2025-07-01 05:46:44.520 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:46:44.527 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:46:44.534 >>> print(''.join(results), end="")
2025-07-01 05:46:44.540 - abcDefghiJkl
2025-07-01 05:46:44.554 + abcdefGhijkl
2025-07-01 05:46:44.567 """
2025-07-01 05:46:44.575
2025-07-01 05:46:44.586 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:46:44.595 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:46:44.601 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:46:44.607 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:46:44.613 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:46:44.619
2025-07-01 05:46:44.626 # search for the pair that matches best without being identical
2025-07-01 05:46:44.638 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:46:44.649 # on junk -- unless we have to)
2025-07-01 05:46:44.657 for j in range(blo, bhi):
2025-07-01 05:46:44.663 bj = b[j]
2025-07-01 05:46:44.669 cruncher.set_seq2(bj)
2025-07-01 05:46:44.675 for i in range(alo, ahi):
2025-07-01 05:46:44.680 ai = a[i]
2025-07-01 05:46:44.686 if ai == bj:
2025-07-01 05:46:44.692 if eqi is None:
2025-07-01 05:46:44.699 eqi, eqj = i, j
2025-07-01 05:46:44.709 continue
2025-07-01 05:46:44.717 cruncher.set_seq1(ai)
2025-07-01 05:46:44.723 # computing similarity is expensive, so use the quick
2025-07-01 05:46:44.730 # upper bounds first -- have seen this speed up messy
2025-07-01 05:46:44.741 # compares by a factor of 3.
2025-07-01 05:46:44.752 # note that ratio() is only expensive to compute the first
2025-07-01 05:46:44.763 # time it's called on a sequence pair; the expensive part
2025-07-01 05:46:44.775 # of the computation is cached by cruncher
2025-07-01 05:46:44.784 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:46:44.791 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:46:44.797 cruncher.ratio() > best_ratio:
2025-07-01 05:46:44.802 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:46:44.807 if best_ratio < cutoff:
2025-07-01 05:46:44.812 # no non-identical "pretty close" pair
2025-07-01 05:46:44.817 if eqi is None:
2025-07-01 05:46:44.823 # no identical pair either -- treat it as a straight replace
2025-07-01 05:46:44.829 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:46:44.834 return
2025-07-01 05:46:44.840 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:46:44.846 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:46:44.851 else:
2025-07-01 05:46:44.859 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:46:44.870 eqi = None
2025-07-01 05:46:44.878
2025-07-01 05:46:44.885 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:46:44.891 # identical
2025-07-01 05:46:44.896
2025-07-01 05:46:44.902 # pump out diffs from before the synch point
2025-07-01 05:46:44.907 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:46:44.913
2025-07-01 05:46:44.922 # do intraline marking on the synch pair
2025-07-01 05:46:44.933 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:46:44.944 if eqi is None:
2025-07-01 05:46:44.954 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:46:44.963 atags = btags = ""
2025-07-01 05:46:44.971 cruncher.set_seqs(aelt, belt)
2025-07-01 05:46:44.979 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:46:44.985 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:46:44.991 if tag == 'replace':
2025-07-01 05:46:44.998 atags += '^' * la
2025-07-01 05:46:45.011 btags += '^' * lb
2025-07-01 05:46:45.020 elif tag == 'delete':
2025-07-01 05:46:45.030 atags += '-' * la
2025-07-01 05:46:45.036 elif tag == 'insert':
2025-07-01 05:46:45.043 btags += '+' * lb
2025-07-01 05:46:45.055 elif tag == 'equal':
2025-07-01 05:46:45.068 atags += ' ' * la
2025-07-01 05:46:45.079 btags += ' ' * lb
2025-07-01 05:46:45.087 else:
2025-07-01 05:46:45.095 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:46:45.101 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:46:45.107 else:
2025-07-01 05:46:45.112 # the synch pair is identical
2025-07-01 05:46:45.118 yield ' ' + aelt
2025-07-01 05:46:45.123
2025-07-01 05:46:45.137 # pump out diffs from after the synch point
2025-07-01 05:46:45.147 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:46:45.154
2025-07-01 05:46:45.166 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:46:45.179 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:46:45.190
2025-07-01 05:46:45.200 self = <difflib.Differ object at [hex]>
2025-07-01 05:46:45.212 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:46:45.221 alo = 184, ahi = 1101
2025-07-01 05:46:45.230 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:46:45.243 blo = 184, bhi = 1101
2025-07-01 05:46:45.255
2025-07-01 05:46:45.265 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:46:45.278 g = []
2025-07-01 05:46:45.288 if alo < ahi:
2025-07-01 05:46:45.296 if blo < bhi:
2025-07-01 05:46:45.304 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:46:45.310 else:
2025-07-01 05:46:45.320 g = self._dump('-', a, alo, ahi)
2025-07-01 05:46:45.330 elif blo < bhi:
2025-07-01 05:46:45.338 g = self._dump('+', b, blo, bhi)
2025-07-01 05:46:45.345
2025-07-01 05:46:45.350 > yield from g
2025-07-01 05:46:45.358
2025-07-01 05:46:45.372 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:46:45.382 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:46:45.390
2025-07-01 05:46:45.395 self = <difflib.Differ object at [hex]>
2025-07-01 05:46:45.402 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:46:45.409 alo = 184, ahi = 1101
2025-07-01 05:46:45.421 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:46:45.430 blo = 184, bhi = 1101
2025-07-01 05:46:45.437
2025-07-01 05:46:45.443 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:46:45.450 r"""
2025-07-01 05:46:45.456 When replacing one block of lines with another, search the blocks
2025-07-01 05:46:45.463 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:46:45.471 synch point, and intraline difference marking is done on the
2025-07-01 05:46:45.478 similar pair. Lots of work, but often worth it.
2025-07-01 05:46:45.485
2025-07-01 05:46:45.491 Example:
2025-07-01 05:46:45.499
2025-07-01 05:46:45.510 >>> d = Differ()
2025-07-01 05:46:45.521 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:46:45.530 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:46:45.537 >>> print(''.join(results), end="")
2025-07-01 05:46:45.546 - abcDefghiJkl
2025-07-01 05:46:45.568 + abcdefGhijkl
2025-07-01 05:46:45.581 """
2025-07-01 05:46:45.588
2025-07-01 05:46:45.594 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:46:45.599 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:46:45.605 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:46:45.610 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:46:45.617 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:46:45.624
2025-07-01 05:46:45.631 # search for the pair that matches best without being identical
2025-07-01 05:46:45.639 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:46:45.650 # on junk -- unless we have to)
2025-07-01 05:46:45.662 for j in range(blo, bhi):
2025-07-01 05:46:45.670 bj = b[j]
2025-07-01 05:46:45.678 cruncher.set_seq2(bj)
2025-07-01 05:46:45.687 for i in range(alo, ahi):
2025-07-01 05:46:45.693 ai = a[i]
2025-07-01 05:46:45.698 if ai == bj:
2025-07-01 05:46:45.704 if eqi is None:
2025-07-01 05:46:45.710 eqi, eqj = i, j
2025-07-01 05:46:45.716 continue
2025-07-01 05:46:45.723 cruncher.set_seq1(ai)
2025-07-01 05:46:45.730 # computing similarity is expensive, so use the quick
2025-07-01 05:46:45.735 # upper bounds first -- have seen this speed up messy
2025-07-01 05:46:45.740 # compares by a factor of 3.
2025-07-01 05:46:45.746 # note that ratio() is only expensive to compute the first
2025-07-01 05:46:45.756 # time it's called on a sequence pair; the expensive part
2025-07-01 05:46:45.765 # of the computation is cached by cruncher
2025-07-01 05:46:45.772 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:46:45.779 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:46:45.786 cruncher.ratio() > best_ratio:
2025-07-01 05:46:45.795 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:46:45.805 if best_ratio < cutoff:
2025-07-01 05:46:45.812 # no non-identical "pretty close" pair
2025-07-01 05:46:45.819 if eqi is None:
2025-07-01 05:46:45.826 # no identical pair either -- treat it as a straight replace
2025-07-01 05:46:45.836 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:46:45.844 return
2025-07-01 05:46:45.852 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:46:45.858 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:46:45.868 else:
2025-07-01 05:46:45.878 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:46:45.885 eqi = None
2025-07-01 05:46:45.892
2025-07-01 05:46:45.897 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:46:45.904 # identical
2025-07-01 05:46:45.910
2025-07-01 05:46:45.921 # pump out diffs from before the synch point
2025-07-01 05:46:45.929 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:46:45.935
2025-07-01 05:46:45.941 # do intraline marking on the synch pair
2025-07-01 05:46:45.950 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:46:45.962 if eqi is None:
2025-07-01 05:46:45.973 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:46:45.980 atags = btags = ""
2025-07-01 05:46:45.986 cruncher.set_seqs(aelt, belt)
2025-07-01 05:46:45.993 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:46:46.001 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:46:46.014 if tag == 'replace':
2025-07-01 05:46:46.023 atags += '^' * la
2025-07-01 05:46:46.032 btags += '^' * lb
2025-07-01 05:46:46.039 elif tag == 'delete':
2025-07-01 05:46:46.047 atags += '-' * la
2025-07-01 05:46:46.058 elif tag == 'insert':
2025-07-01 05:46:46.066 btags += '+' * lb
2025-07-01 05:46:46.075 elif tag == 'equal':
2025-07-01 05:46:46.086 atags += ' ' * la
2025-07-01 05:46:46.095 btags += ' ' * lb
2025-07-01 05:46:46.103 else:
2025-07-01 05:46:46.111 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:46:46.118 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:46:46.126 else:
2025-07-01 05:46:46.134 # the synch pair is identical
2025-07-01 05:46:46.140 yield ' ' + aelt
2025-07-01 05:46:46.145
2025-07-01 05:46:46.150 # pump out diffs from after the synch point
2025-07-01 05:46:46.155 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:46:46.162
2025-07-01 05:46:46.174 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:46:46.181 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:46:46.186
2025-07-01 05:46:46.197 self = <difflib.Differ object at [hex]>
2025-07-01 05:46:46.204 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:46:46.211 alo = 185, ahi = 1101
2025-07-01 05:46:46.221 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:46:46.229 blo = 185, bhi = 1101
2025-07-01 05:46:46.235
2025-07-01 05:46:46.242 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:46:46.248 g = []
2025-07-01 05:46:46.255 if alo < ahi:
2025-07-01 05:46:46.261 if blo < bhi:
2025-07-01 05:46:46.268 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:46:46.275 else:
2025-07-01 05:46:46.283 g = self._dump('-', a, alo, ahi)
2025-07-01 05:46:46.294 elif blo < bhi:
2025-07-01 05:46:46.302 g = self._dump('+', b, blo, bhi)
2025-07-01 05:46:46.309
2025-07-01 05:46:46.315 > yield from g
2025-07-01 05:46:46.319
2025-07-01 05:46:46.324 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:46:46.329 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:46:46.337
2025-07-01 05:46:46.342 self = <difflib.Differ object at [hex]>
2025-07-01 05:46:46.348 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:46:46.354 alo = 185, ahi = 1101
2025-07-01 05:46:46.361 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:46:46.367 blo = 185, bhi = 1101
2025-07-01 05:46:46.373
2025-07-01 05:46:46.379 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:46:46.393 r"""
2025-07-01 05:46:46.401 When replacing one block of lines with another, search the blocks
2025-07-01 05:46:46.409 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:46:46.414 synch point, and intraline difference marking is done on the
2025-07-01 05:46:46.419 similar pair. Lots of work, but often worth it.
2025-07-01 05:46:46.424
2025-07-01 05:46:46.429 Example:
2025-07-01 05:46:46.433
2025-07-01 05:46:46.445 >>> d = Differ()
2025-07-01 05:46:46.454 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:46:46.464 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:46:46.475 >>> print(''.join(results), end="")
2025-07-01 05:46:46.484 - abcDefghiJkl
2025-07-01 05:46:46.499 + abcdefGhijkl
2025-07-01 05:46:46.518 """
2025-07-01 05:46:46.527
2025-07-01 05:46:46.539 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:46:46.549 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:46:46.557 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:46:46.566 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:46:46.574 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:46:46.581
2025-07-01 05:46:46.587 # search for the pair that matches best without being identical
2025-07-01 05:46:46.594 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:46:46.601 # on junk -- unless we have to)
2025-07-01 05:46:46.608 for j in range(blo, bhi):
2025-07-01 05:46:46.615 bj = b[j]
2025-07-01 05:46:46.622 cruncher.set_seq2(bj)
2025-07-01 05:46:46.631 for i in range(alo, ahi):
2025-07-01 05:46:46.639 ai = a[i]
2025-07-01 05:46:46.650 if ai == bj:
2025-07-01 05:46:46.657 if eqi is None:
2025-07-01 05:46:46.663 eqi, eqj = i, j
2025-07-01 05:46:46.671 continue
2025-07-01 05:46:46.681 cruncher.set_seq1(ai)
2025-07-01 05:46:46.690 # computing similarity is expensive, so use the quick
2025-07-01 05:46:46.696 # upper bounds first -- have seen this speed up messy
2025-07-01 05:46:46.702 # compares by a factor of 3.
2025-07-01 05:46:46.709 # note that ratio() is only expensive to compute the first
2025-07-01 05:46:46.720 # time it's called on a sequence pair; the expensive part
2025-07-01 05:46:46.731 # of the computation is cached by cruncher
2025-07-01 05:46:46.740 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:46:46.747 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:46:46.753 cruncher.ratio() > best_ratio:
2025-07-01 05:46:46.759 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:46:46.764 if best_ratio < cutoff:
2025-07-01 05:46:46.770 # no non-identical "pretty close" pair
2025-07-01 05:46:46.776 if eqi is None:
2025-07-01 05:46:46.783 # no identical pair either -- treat it as a straight replace
2025-07-01 05:46:46.790 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:46:46.797 return
2025-07-01 05:46:46.804 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:46:46.811 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:46:46.818 else:
2025-07-01 05:46:46.827 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:46:46.838 eqi = None
2025-07-01 05:46:46.846
2025-07-01 05:46:46.853 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:46:46.860 # identical
2025-07-01 05:46:46.867
2025-07-01 05:46:46.873 # pump out diffs from before the synch point
2025-07-01 05:46:46.882 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:46:46.888
2025-07-01 05:46:46.894 # do intraline marking on the synch pair
2025-07-01 05:46:46.900 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:46:46.906 if eqi is None:
2025-07-01 05:46:46.911 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:46:46.917 atags = btags = ""
2025-07-01 05:46:46.925 cruncher.set_seqs(aelt, belt)
2025-07-01 05:46:46.933 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:46:46.940 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:46:46.947 if tag == 'replace':
2025-07-01 05:46:46.954 atags += '^' * la
2025-07-01 05:46:46.962 btags += '^' * lb
2025-07-01 05:46:46.969 elif tag == 'delete':
2025-07-01 05:46:46.976 atags += '-' * la
2025-07-01 05:46:46.983 elif tag == 'insert':
2025-07-01 05:46:46.991 btags += '+' * lb
2025-07-01 05:46:47.004 elif tag == 'equal':
2025-07-01 05:46:47.013 atags += ' ' * la
2025-07-01 05:46:47.020 btags += ' ' * lb
2025-07-01 05:46:47.027 else:
2025-07-01 05:46:47.034 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:46:47.040 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:46:47.046 else:
2025-07-01 05:46:47.056 # the synch pair is identical
2025-07-01 05:46:47.068 yield ' ' + aelt
2025-07-01 05:46:47.078
2025-07-01 05:46:47.087 # pump out diffs from after the synch point
2025-07-01 05:46:47.095 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:46:47.101
2025-07-01 05:46:47.107 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:46:47.113 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:46:47.119
2025-07-01 05:46:47.127 self = <difflib.Differ object at [hex]>
2025-07-01 05:46:47.139 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:46:47.149 alo = 186, ahi = 1101
2025-07-01 05:46:47.159 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:46:47.166 blo = 186, bhi = 1101
2025-07-01 05:46:47.172
2025-07-01 05:46:47.179 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:46:47.185 g = []
2025-07-01 05:46:47.190 if alo < ahi:
2025-07-01 05:46:47.197 if blo < bhi:
2025-07-01 05:46:47.203 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:46:47.216 else:
2025-07-01 05:46:47.227 g = self._dump('-', a, alo, ahi)
2025-07-01 05:46:47.234 elif blo < bhi:
2025-07-01 05:46:47.243 g = self._dump('+', b, blo, bhi)
2025-07-01 05:46:47.252
2025-07-01 05:46:47.260 > yield from g
2025-07-01 05:46:47.267
2025-07-01 05:46:47.272 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:46:47.280 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:46:47.290
2025-07-01 05:46:47.297 self = <difflib.Differ object at [hex]>
2025-07-01 05:46:47.306 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:46:47.316 alo = 186, ahi = 1101
2025-07-01 05:46:47.325 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:46:47.332 blo = 186, bhi = 1101
2025-07-01 05:46:47.339
2025-07-01 05:46:47.347 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:46:47.358 r"""
2025-07-01 05:46:47.366 When replacing one block of lines with another, search the blocks
2025-07-01 05:46:47.374 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:46:47.381 synch point, and intraline difference marking is done on the
2025-07-01 05:46:47.388 similar pair. Lots of work, but often worth it.
2025-07-01 05:46:47.399
2025-07-01 05:46:47.406 Example:
2025-07-01 05:46:47.412
2025-07-01 05:46:47.424 >>> d = Differ()
2025-07-01 05:46:47.433 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:46:47.441 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:46:47.446 >>> print(''.join(results), end="")
2025-07-01 05:46:47.452 - abcDefghiJkl
2025-07-01 05:46:47.462 + abcdefGhijkl
2025-07-01 05:46:47.473 """
2025-07-01 05:46:47.479
2025-07-01 05:46:47.486 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:46:47.493 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:46:47.500 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:46:47.506 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:46:47.512 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:46:47.519
2025-07-01 05:46:47.530 # search for the pair that matches best without being identical
2025-07-01 05:46:47.538 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:46:47.549 # on junk -- unless we have to)
2025-07-01 05:46:47.556 for j in range(blo, bhi):
2025-07-01 05:46:47.562 bj = b[j]
2025-07-01 05:46:47.567 cruncher.set_seq2(bj)
2025-07-01 05:46:47.575 for i in range(alo, ahi):
2025-07-01 05:46:47.588 ai = a[i]
2025-07-01 05:46:47.597 if ai == bj:
2025-07-01 05:46:47.604 if eqi is None:
2025-07-01 05:46:47.610 eqi, eqj = i, j
2025-07-01 05:46:47.616 continue
2025-07-01 05:46:47.622 cruncher.set_seq1(ai)
2025-07-01 05:46:47.628 # computing similarity is expensive, so use the quick
2025-07-01 05:46:47.637 # upper bounds first -- have seen this speed up messy
2025-07-01 05:46:47.651 # compares by a factor of 3.
2025-07-01 05:46:47.661 # note that ratio() is only expensive to compute the first
2025-07-01 05:46:47.670 # time it's called on a sequence pair; the expensive part
2025-07-01 05:46:47.682 # of the computation is cached by cruncher
2025-07-01 05:46:47.689 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:46:47.696 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:46:47.702 cruncher.ratio() > best_ratio:
2025-07-01 05:46:47.710 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:46:47.721 if best_ratio < cutoff:
2025-07-01 05:46:47.731 # no non-identical "pretty close" pair
2025-07-01 05:46:47.743 if eqi is None:
2025-07-01 05:46:47.750 # no identical pair either -- treat it as a straight replace
2025-07-01 05:46:47.759 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:46:47.769 return
2025-07-01 05:46:47.781 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:46:47.791 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:46:47.799 else:
2025-07-01 05:46:47.807 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:46:47.817 eqi = None
2025-07-01 05:46:47.826
2025-07-01 05:46:47.837 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:46:47.847 # identical
2025-07-01 05:46:47.856
2025-07-01 05:46:47.864 # pump out diffs from before the synch point
2025-07-01 05:46:47.871 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:46:47.879
2025-07-01 05:46:47.890 # do intraline marking on the synch pair
2025-07-01 05:46:47.902 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:46:47.912 if eqi is None:
2025-07-01 05:46:47.921 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:46:47.928 atags = btags = ""
2025-07-01 05:46:47.934 cruncher.set_seqs(aelt, belt)
2025-07-01 05:46:47.940 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:46:47.946 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:46:47.952 if tag == 'replace':
2025-07-01 05:46:47.958 atags += '^' * la
2025-07-01 05:46:47.963 btags += '^' * lb
2025-07-01 05:46:47.969 elif tag == 'delete':
2025-07-01 05:46:47.974 atags += '-' * la
2025-07-01 05:46:47.981 elif tag == 'insert':
2025-07-01 05:46:47.991 btags += '+' * lb
2025-07-01 05:46:48.002 elif tag == 'equal':
2025-07-01 05:46:48.014 atags += ' ' * la
2025-07-01 05:46:48.025 btags += ' ' * lb
2025-07-01 05:46:48.037 else:
2025-07-01 05:46:48.049 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:46:48.059 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:46:48.067 else:
2025-07-01 05:46:48.073 # the synch pair is identical
2025-07-01 05:46:48.079 yield ' ' + aelt
2025-07-01 05:46:48.085
2025-07-01 05:46:48.091 # pump out diffs from after the synch point
2025-07-01 05:46:48.098 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:46:48.103
2025-07-01 05:46:48.111 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:46:48.122 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:46:48.131
2025-07-01 05:46:48.140 self = <difflib.Differ object at [hex]>
2025-07-01 05:46:48.149 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:46:48.154 alo = 187, ahi = 1101
2025-07-01 05:46:48.163 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:46:48.170 blo = 187, bhi = 1101
2025-07-01 05:46:48.176
2025-07-01 05:46:48.188 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:46:48.201 g = []
2025-07-01 05:46:48.211 if alo < ahi:
2025-07-01 05:46:48.220 if blo < bhi:
2025-07-01 05:46:48.227 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:46:48.235 else:
2025-07-01 05:46:48.248 g = self._dump('-', a, alo, ahi)
2025-07-01 05:46:48.256 elif blo < bhi:
2025-07-01 05:46:48.267 g = self._dump('+', b, blo, bhi)
2025-07-01 05:46:48.277
2025-07-01 05:46:48.283 > yield from g
2025-07-01 05:46:48.293
2025-07-01 05:46:48.306 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:46:48.317 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:46:48.324
2025-07-01 05:46:48.330 self = <difflib.Differ object at [hex]>
2025-07-01 05:46:48.337 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:46:48.343 alo = 187, ahi = 1101
2025-07-01 05:46:48.350 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:46:48.356 blo = 187, bhi = 1101
2025-07-01 05:46:48.365
2025-07-01 05:46:48.377 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:46:48.388 r"""
2025-07-01 05:46:48.400 When replacing one block of lines with another, search the blocks
2025-07-01 05:46:48.409 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:46:48.419 synch point, and intraline difference marking is done on the
2025-07-01 05:46:48.426 similar pair. Lots of work, but often worth it.
2025-07-01 05:46:48.435
2025-07-01 05:46:48.443 Example:
2025-07-01 05:46:48.449
2025-07-01 05:46:48.455 >>> d = Differ()
2025-07-01 05:46:48.460 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:46:48.464 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:46:48.470 >>> print(''.join(results), end="")
2025-07-01 05:46:48.477 - abcDefghiJkl
2025-07-01 05:46:48.490 + abcdefGhijkl
2025-07-01 05:46:48.509 """
2025-07-01 05:46:48.518
2025-07-01 05:46:48.526 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:46:48.535 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:46:48.541 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:46:48.553 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:46:48.563 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:46:48.571
2025-07-01 05:46:48.578 # search for the pair that matches best without being identical
2025-07-01 05:46:48.587 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:46:48.596 # on junk -- unless we have to)
2025-07-01 05:46:48.608 for j in range(blo, bhi):
2025-07-01 05:46:48.618 bj = b[j]
2025-07-01 05:46:48.626 cruncher.set_seq2(bj)
2025-07-01 05:46:48.634 for i in range(alo, ahi):
2025-07-01 05:46:48.640 ai = a[i]
2025-07-01 05:46:48.646 if ai == bj:
2025-07-01 05:46:48.652 if eqi is None:
2025-07-01 05:46:48.656 eqi, eqj = i, j
2025-07-01 05:46:48.661 continue
2025-07-01 05:46:48.665 cruncher.set_seq1(ai)
2025-07-01 05:46:48.670 # computing similarity is expensive, so use the quick
2025-07-01 05:46:48.675 # upper bounds first -- have seen this speed up messy
2025-07-01 05:46:48.679 # compares by a factor of 3.
2025-07-01 05:46:48.684 # note that ratio() is only expensive to compute the first
2025-07-01 05:46:48.690 # time it's called on a sequence pair; the expensive part
2025-07-01 05:46:48.696 # of the computation is cached by cruncher
2025-07-01 05:46:48.701 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:46:48.707 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:46:48.713 cruncher.ratio() > best_ratio:
2025-07-01 05:46:48.719 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:46:48.726 if best_ratio < cutoff:
2025-07-01 05:46:48.737 # no non-identical "pretty close" pair
2025-07-01 05:46:48.745 if eqi is None:
2025-07-01 05:46:48.752 # no identical pair either -- treat it as a straight replace
2025-07-01 05:46:48.757 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:46:48.763 return
2025-07-01 05:46:48.768 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:46:48.774 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:46:48.780 else:
2025-07-01 05:46:48.792 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:46:48.803 eqi = None
2025-07-01 05:46:48.815
2025-07-01 05:46:48.823 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:46:48.830 # identical
2025-07-01 05:46:48.836
2025-07-01 05:46:48.842 # pump out diffs from before the synch point
2025-07-01 05:46:48.847 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:46:48.853
2025-07-01 05:46:48.859 # do intraline marking on the synch pair
2025-07-01 05:46:48.865 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:46:48.872 if eqi is None:
2025-07-01 05:46:48.879 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:46:48.887 atags = btags = ""
2025-07-01 05:46:48.898 cruncher.set_seqs(aelt, belt)
2025-07-01 05:46:48.905 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:46:48.911 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:46:48.917 if tag == 'replace':
2025-07-01 05:46:48.923 atags += '^' * la
2025-07-01 05:46:48.929 btags += '^' * lb
2025-07-01 05:46:48.936 elif tag == 'delete':
2025-07-01 05:46:48.943 atags += '-' * la
2025-07-01 05:46:48.954 elif tag == 'insert':
2025-07-01 05:46:48.963 btags += '+' * lb
2025-07-01 05:46:48.972 elif tag == 'equal':
2025-07-01 05:46:48.980 atags += ' ' * la
2025-07-01 05:46:48.989 btags += ' ' * lb
2025-07-01 05:46:48.996 else:
2025-07-01 05:46:49.003 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:46:49.010 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:46:49.020 else:
2025-07-01 05:46:49.029 # the synch pair is identical
2025-07-01 05:46:49.036 yield ' ' + aelt
2025-07-01 05:46:49.042
2025-07-01 05:46:49.053 # pump out diffs from after the synch point
2025-07-01 05:46:49.065 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:46:49.077
2025-07-01 05:46:49.087 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:46:49.095 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:46:49.102
2025-07-01 05:46:49.108 self = <difflib.Differ object at [hex]>
2025-07-01 05:46:49.114 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:46:49.120 alo = 188, ahi = 1101
2025-07-01 05:46:49.128 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:46:49.135 blo = 188, bhi = 1101
2025-07-01 05:46:49.145
2025-07-01 05:46:49.155 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:46:49.170 g = []
2025-07-01 05:46:49.179 if alo < ahi:
2025-07-01 05:46:49.185 if blo < bhi:
2025-07-01 05:46:49.190 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:46:49.196 else:
2025-07-01 05:46:49.202 g = self._dump('-', a, alo, ahi)
2025-07-01 05:46:49.210 elif blo < bhi:
2025-07-01 05:46:49.218 g = self._dump('+', b, blo, bhi)
2025-07-01 05:46:49.224
2025-07-01 05:46:49.230 > yield from g
2025-07-01 05:46:49.236
2025-07-01 05:46:49.243 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:46:49.253 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:46:49.260
2025-07-01 05:46:49.267 self = <difflib.Differ object at [hex]>
2025-07-01 05:46:49.276 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:46:49.287 alo = 188, ahi = 1101
2025-07-01 05:46:49.297 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:46:49.307 blo = 188, bhi = 1101
2025-07-01 05:46:49.318
2025-07-01 05:46:49.330 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:46:49.340 r"""
2025-07-01 05:46:49.347 When replacing one block of lines with another, search the blocks
2025-07-01 05:46:49.355 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:46:49.367 synch point, and intraline difference marking is done on the
2025-07-01 05:46:49.377 similar pair. Lots of work, but often worth it.
2025-07-01 05:46:49.385
2025-07-01 05:46:49.392 Example:
2025-07-01 05:46:49.399
2025-07-01 05:46:49.404 >>> d = Differ()
2025-07-01 05:46:49.410 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:46:49.415 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:46:49.421 >>> print(''.join(results), end="")
2025-07-01 05:46:49.426 - abcDefghiJkl
2025-07-01 05:46:49.448 + abcdefGhijkl
2025-07-01 05:46:49.466 """
2025-07-01 05:46:49.478
2025-07-01 05:46:49.488 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:46:49.500 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:46:49.512 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:46:49.522 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:46:49.531 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:46:49.538
2025-07-01 05:46:49.549 # search for the pair that matches best without being identical
2025-07-01 05:46:49.559 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:46:49.566 # on junk -- unless we have to)
2025-07-01 05:46:49.573 for j in range(blo, bhi):
2025-07-01 05:46:49.579 bj = b[j]
2025-07-01 05:46:49.585 cruncher.set_seq2(bj)
2025-07-01 05:46:49.590 for i in range(alo, ahi):
2025-07-01 05:46:49.601 ai = a[i]
2025-07-01 05:46:49.614 if ai == bj:
2025-07-01 05:46:49.624 if eqi is None:
2025-07-01 05:46:49.631 eqi, eqj = i, j
2025-07-01 05:46:49.637 continue
2025-07-01 05:46:49.643 cruncher.set_seq1(ai)
2025-07-01 05:46:49.651 # computing similarity is expensive, so use the quick
2025-07-01 05:46:49.663 # upper bounds first -- have seen this speed up messy
2025-07-01 05:46:49.674 # compares by a factor of 3.
2025-07-01 05:46:49.683 # note that ratio() is only expensive to compute the first
2025-07-01 05:46:49.691 # time it's called on a sequence pair; the expensive part
2025-07-01 05:46:49.703 # of the computation is cached by cruncher
2025-07-01 05:46:49.714 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:46:49.723 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:46:49.730 cruncher.ratio() > best_ratio:
2025-07-01 05:46:49.743 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:46:49.755 if best_ratio < cutoff:
2025-07-01 05:46:49.768 # no non-identical "pretty close" pair
2025-07-01 05:46:49.777 if eqi is None:
2025-07-01 05:46:49.789 # no identical pair either -- treat it as a straight replace
2025-07-01 05:46:49.801 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:46:49.812 return
2025-07-01 05:46:49.823 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:46:49.832 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:46:49.839 else:
2025-07-01 05:46:49.847 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:46:49.855 eqi = None
2025-07-01 05:46:49.866
2025-07-01 05:46:49.879 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:46:49.888 # identical
2025-07-01 05:46:49.895
2025-07-01 05:46:49.903 # pump out diffs from before the synch point
2025-07-01 05:46:49.912 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:46:49.920
2025-07-01 05:46:49.927 # do intraline marking on the synch pair
2025-07-01 05:46:49.933 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:46:49.938 if eqi is None:
2025-07-01 05:46:49.944 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:46:49.950 atags = btags = ""
2025-07-01 05:46:49.960 cruncher.set_seqs(aelt, belt)
2025-07-01 05:46:49.970 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:46:49.978 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:46:49.985 if tag == 'replace':
2025-07-01 05:46:49.994 atags += '^' * la
2025-07-01 05:46:50.005 btags += '^' * lb
2025-07-01 05:46:50.017 elif tag == 'delete':
2025-07-01 05:46:50.026 atags += '-' * la
2025-07-01 05:46:50.038 elif tag == 'insert':
2025-07-01 05:46:50.049 btags += '+' * lb
2025-07-01 05:46:50.060 elif tag == 'equal':
2025-07-01 05:46:50.072 atags += ' ' * la
2025-07-01 05:46:50.082 btags += ' ' * lb
2025-07-01 05:46:50.095 else:
2025-07-01 05:46:50.108 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:46:50.117 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:46:50.129 else:
2025-07-01 05:46:50.138 # the synch pair is identical
2025-07-01 05:46:50.148 yield ' ' + aelt
2025-07-01 05:46:50.160
2025-07-01 05:46:50.169 # pump out diffs from after the synch point
2025-07-01 05:46:50.182 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:46:50.194
2025-07-01 05:46:50.203 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:46:50.209 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:46:50.215
2025-07-01 05:46:50.222 self = <difflib.Differ object at [hex]>
2025-07-01 05:46:50.230 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:46:50.236 alo = 189, ahi = 1101
2025-07-01 05:46:50.249 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:46:50.258 blo = 189, bhi = 1101
2025-07-01 05:46:50.265
2025-07-01 05:46:50.271 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:46:50.278 g = []
2025-07-01 05:46:50.287 if alo < ahi:
2025-07-01 05:46:50.294 if blo < bhi:
2025-07-01 05:46:50.301 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:46:50.311 else:
2025-07-01 05:46:50.317 g = self._dump('-', a, alo, ahi)
2025-07-01 05:46:50.323 elif blo < bhi:
2025-07-01 05:46:50.327 g = self._dump('+', b, blo, bhi)
2025-07-01 05:46:50.332
2025-07-01 05:46:50.336 > yield from g
2025-07-01 05:46:50.341
2025-07-01 05:46:50.346 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:46:50.352 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:46:50.357
2025-07-01 05:46:50.363 self = <difflib.Differ object at [hex]>
2025-07-01 05:46:50.369 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:46:50.374 alo = 189, ahi = 1101
2025-07-01 05:46:50.379 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:46:50.384 blo = 189, bhi = 1101
2025-07-01 05:46:50.388
2025-07-01 05:46:50.393 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:46:50.399 r"""
2025-07-01 05:46:50.404 When replacing one block of lines with another, search the blocks
2025-07-01 05:46:50.414 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:46:50.424 synch point, and intraline difference marking is done on the
2025-07-01 05:46:50.434 similar pair. Lots of work, but often worth it.
2025-07-01 05:46:50.440
2025-07-01 05:46:50.450 Example:
2025-07-01 05:46:50.458
2025-07-01 05:46:50.468 >>> d = Differ()
2025-07-01 05:46:50.479 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:46:50.487 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:46:50.493 >>> print(''.join(results), end="")
2025-07-01 05:46:50.499 - abcDefghiJkl
2025-07-01 05:46:50.516 + abcdefGhijkl
2025-07-01 05:46:50.529 """
2025-07-01 05:46:50.535
2025-07-01 05:46:50.542 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:46:50.553 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:46:50.563 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:46:50.570 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:46:50.576 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:46:50.582
2025-07-01 05:46:50.588 # search for the pair that matches best without being identical
2025-07-01 05:46:50.595 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:46:50.601 # on junk -- unless we have to)
2025-07-01 05:46:50.608 for j in range(blo, bhi):
2025-07-01 05:46:50.615 bj = b[j]
2025-07-01 05:46:50.626 cruncher.set_seq2(bj)
2025-07-01 05:46:50.638 for i in range(alo, ahi):
2025-07-01 05:46:50.648 ai = a[i]
2025-07-01 05:46:50.657 if ai == bj:
2025-07-01 05:46:50.666 if eqi is None:
2025-07-01 05:46:50.678 eqi, eqj = i, j
2025-07-01 05:46:50.688 continue
2025-07-01 05:46:50.699 cruncher.set_seq1(ai)
2025-07-01 05:46:50.709 # computing similarity is expensive, so use the quick
2025-07-01 05:46:50.716 # upper bounds first -- have seen this speed up messy
2025-07-01 05:46:50.722 # compares by a factor of 3.
2025-07-01 05:46:50.727 # note that ratio() is only expensive to compute the first
2025-07-01 05:46:50.734 # time it's called on a sequence pair; the expensive part
2025-07-01 05:46:50.741 # of the computation is cached by cruncher
2025-07-01 05:46:50.752 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:46:50.761 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:46:50.769 cruncher.ratio() > best_ratio:
2025-07-01 05:46:50.778 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:46:50.786 if best_ratio < cutoff:
2025-07-01 05:46:50.794 # no non-identical "pretty close" pair
2025-07-01 05:46:50.805 if eqi is None:
2025-07-01 05:46:50.815 # no identical pair either -- treat it as a straight replace
2025-07-01 05:46:50.823 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:46:50.833 return
2025-07-01 05:46:50.845 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:46:50.854 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:46:50.866 else:
2025-07-01 05:46:50.879 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:46:50.890 eqi = None
2025-07-01 05:46:50.902
2025-07-01 05:46:50.916 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:46:50.928 # identical
2025-07-01 05:46:50.938
2025-07-01 05:46:50.946 # pump out diffs from before the synch point
2025-07-01 05:46:50.952 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:46:50.958
2025-07-01 05:46:50.967 # do intraline marking on the synch pair
2025-07-01 05:46:50.978 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:46:50.987 if eqi is None:
2025-07-01 05:46:50.994 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:46:51.000 atags = btags = ""
2025-07-01 05:46:51.007 cruncher.set_seqs(aelt, belt)
2025-07-01 05:46:51.015 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:46:51.022 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:46:51.030 if tag == 'replace':
2025-07-01 05:46:51.036 atags += '^' * la
2025-07-01 05:46:51.042 btags += '^' * lb
2025-07-01 05:46:51.047 elif tag == 'delete':
2025-07-01 05:46:51.051 atags += '-' * la
2025-07-01 05:46:51.056 elif tag == 'insert':
2025-07-01 05:46:51.060 btags += '+' * lb
2025-07-01 05:46:51.065 elif tag == 'equal':
2025-07-01 05:46:51.071 atags += ' ' * la
2025-07-01 05:46:51.076 btags += ' ' * lb
2025-07-01 05:46:51.082 else:
2025-07-01 05:46:51.088 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:46:51.095 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:46:51.105 else:
2025-07-01 05:46:51.114 # the synch pair is identical
2025-07-01 05:46:51.121 yield ' ' + aelt
2025-07-01 05:46:51.127
2025-07-01 05:46:51.132 # pump out diffs from after the synch point
2025-07-01 05:46:51.138 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:46:51.144
2025-07-01 05:46:51.151 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:46:51.161 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:46:51.168
2025-07-01 05:46:51.175 self = <difflib.Differ object at [hex]>
2025-07-01 05:46:51.184 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:46:51.194 alo = 190, ahi = 1101
2025-07-01 05:46:51.208 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:46:51.219 blo = 190, bhi = 1101
2025-07-01 05:46:51.227
2025-07-01 05:46:51.234 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:46:51.240 g = []
2025-07-01 05:46:51.245 if alo < ahi:
2025-07-01 05:46:51.250 if blo < bhi:
2025-07-01 05:46:51.259 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:46:51.266 else:
2025-07-01 05:46:51.272 g = self._dump('-', a, alo, ahi)
2025-07-01 05:46:51.278 elif blo < bhi:
2025-07-01 05:46:51.285 g = self._dump('+', b, blo, bhi)
2025-07-01 05:46:51.290
2025-07-01 05:46:51.302 > yield from g
2025-07-01 05:46:51.311
2025-07-01 05:46:51.319 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:46:51.327 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:46:51.334
2025-07-01 05:46:51.341 self = <difflib.Differ object at [hex]>
2025-07-01 05:46:51.348 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:46:51.353 alo = 190, ahi = 1101
2025-07-01 05:46:51.358 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:46:51.363 blo = 190, bhi = 1101
2025-07-01 05:46:51.370
2025-07-01 05:46:51.377 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:46:51.384 r"""
2025-07-01 05:46:51.390 When replacing one block of lines with another, search the blocks
2025-07-01 05:46:51.396 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:46:51.401 synch point, and intraline difference marking is done on the
2025-07-01 05:46:51.406 similar pair. Lots of work, but often worth it.
2025-07-01 05:46:51.412
2025-07-01 05:46:51.417 Example:
2025-07-01 05:46:51.423
2025-07-01 05:46:51.429 >>> d = Differ()
2025-07-01 05:46:51.436 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:46:51.442 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:46:51.448 >>> print(''.join(results), end="")
2025-07-01 05:46:51.453 - abcDefghiJkl
2025-07-01 05:46:51.463 + abcdefGhijkl
2025-07-01 05:46:51.475 """
2025-07-01 05:46:51.482
2025-07-01 05:46:51.489 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:46:51.496 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:46:51.501 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:46:51.506 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:46:51.510 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:46:51.515
2025-07-01 05:46:51.521 # search for the pair that matches best without being identical
2025-07-01 05:46:51.526 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:46:51.532 # on junk -- unless we have to)
2025-07-01 05:46:51.538 for j in range(blo, bhi):
2025-07-01 05:46:51.543 bj = b[j]
2025-07-01 05:46:51.548 cruncher.set_seq2(bj)
2025-07-01 05:46:51.553 for i in range(alo, ahi):
2025-07-01 05:46:51.558 ai = a[i]
2025-07-01 05:46:51.563 if ai == bj:
2025-07-01 05:46:51.569 if eqi is None:
2025-07-01 05:46:51.575 eqi, eqj = i, j
2025-07-01 05:46:51.582 continue
2025-07-01 05:46:51.590 cruncher.set_seq1(ai)
2025-07-01 05:46:51.601 # computing similarity is expensive, so use the quick
2025-07-01 05:46:51.615 # upper bounds first -- have seen this speed up messy
2025-07-01 05:46:51.625 # compares by a factor of 3.
2025-07-01 05:46:51.639 # note that ratio() is only expensive to compute the first
2025-07-01 05:46:51.650 # time it's called on a sequence pair; the expensive part
2025-07-01 05:46:51.660 # of the computation is cached by cruncher
2025-07-01 05:46:51.672 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:46:51.685 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:46:51.697 cruncher.ratio() > best_ratio:
2025-07-01 05:46:51.708 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:46:51.716 if best_ratio < cutoff:
2025-07-01 05:46:51.724 # no non-identical "pretty close" pair
2025-07-01 05:46:51.730 if eqi is None:
2025-07-01 05:46:51.737 # no identical pair either -- treat it as a straight replace
2025-07-01 05:46:51.744 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:46:51.749 return
2025-07-01 05:46:51.755 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:46:51.761 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:46:51.772 else:
2025-07-01 05:46:51.784 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:46:51.793 eqi = None
2025-07-01 05:46:51.801
2025-07-01 05:46:51.807 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:46:51.812 # identical
2025-07-01 05:46:51.817
2025-07-01 05:46:51.825 # pump out diffs from before the synch point
2025-07-01 05:46:51.832 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:46:51.838
2025-07-01 05:46:51.844 # do intraline marking on the synch pair
2025-07-01 05:46:51.849 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:46:51.854 if eqi is None:
2025-07-01 05:46:51.864 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:46:51.874 atags = btags = ""
2025-07-01 05:46:51.881 cruncher.set_seqs(aelt, belt)
2025-07-01 05:46:51.888 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:46:51.894 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:46:51.904 if tag == 'replace':
2025-07-01 05:46:51.913 atags += '^' * la
2025-07-01 05:46:51.920 btags += '^' * lb
2025-07-01 05:46:51.927 elif tag == 'delete':
2025-07-01 05:46:51.935 atags += '-' * la
2025-07-01 05:46:51.945 elif tag == 'insert':
2025-07-01 05:46:51.953 btags += '+' * lb
2025-07-01 05:46:51.964 elif tag == 'equal':
2025-07-01 05:46:51.975 atags += ' ' * la
2025-07-01 05:46:51.985 btags += ' ' * lb
2025-07-01 05:46:51.993 else:
2025-07-01 05:46:52.004 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:46:52.014 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:46:52.021 else:
2025-07-01 05:46:52.028 # the synch pair is identical
2025-07-01 05:46:52.036 yield ' ' + aelt
2025-07-01 05:46:52.044
2025-07-01 05:46:52.056 # pump out diffs from after the synch point
2025-07-01 05:46:52.065 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:46:52.071
2025-07-01 05:46:52.077 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:46:52.090 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:46:52.100
2025-07-01 05:46:52.109 self = <difflib.Differ object at [hex]>
2025-07-01 05:46:52.119 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:46:52.129 alo = 191, ahi = 1101
2025-07-01 05:46:52.139 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:46:52.148 blo = 191, bhi = 1101
2025-07-01 05:46:52.160
2025-07-01 05:46:52.173 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:46:52.181 g = []
2025-07-01 05:46:52.188 if alo < ahi:
2025-07-01 05:46:52.195 if blo < bhi:
2025-07-01 05:46:52.200 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:46:52.205 else:
2025-07-01 05:46:52.211 g = self._dump('-', a, alo, ahi)
2025-07-01 05:46:52.216 elif blo < bhi:
2025-07-01 05:46:52.220 g = self._dump('+', b, blo, bhi)
2025-07-01 05:46:52.225
2025-07-01 05:46:52.231 > yield from g
2025-07-01 05:46:52.237
2025-07-01 05:46:52.243 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:46:52.249 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:46:52.256
2025-07-01 05:46:52.264 self = <difflib.Differ object at [hex]>
2025-07-01 05:46:52.277 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:46:52.287 alo = 191, ahi = 1101
2025-07-01 05:46:52.295 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:46:52.302 blo = 191, bhi = 1101
2025-07-01 05:46:52.309
2025-07-01 05:46:52.319 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:46:52.327 r"""
2025-07-01 05:46:52.340 When replacing one block of lines with another, search the blocks
2025-07-01 05:46:52.351 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:46:52.360 synch point, and intraline difference marking is done on the
2025-07-01 05:46:52.371 similar pair. Lots of work, but often worth it.
2025-07-01 05:46:52.377
2025-07-01 05:46:52.384 Example:
2025-07-01 05:46:52.391
2025-07-01 05:46:52.399 >>> d = Differ()
2025-07-01 05:46:52.410 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:46:52.422 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:46:52.430 >>> print(''.join(results), end="")
2025-07-01 05:46:52.437 - abcDefghiJkl
2025-07-01 05:46:52.452 + abcdefGhijkl
2025-07-01 05:46:52.471 """
2025-07-01 05:46:52.477
2025-07-01 05:46:52.482 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:46:52.488 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:46:52.495 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:46:52.501 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:46:52.508 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:46:52.515
2025-07-01 05:46:52.522 # search for the pair that matches best without being identical
2025-07-01 05:46:52.529 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:46:52.536 # on junk -- unless we have to)
2025-07-01 05:46:52.543 for j in range(blo, bhi):
2025-07-01 05:46:52.549 bj = b[j]
2025-07-01 05:46:52.561 cruncher.set_seq2(bj)
2025-07-01 05:46:52.572 for i in range(alo, ahi):
2025-07-01 05:46:52.579 ai = a[i]
2025-07-01 05:46:52.586 if ai == bj:
2025-07-01 05:46:52.592 if eqi is None:
2025-07-01 05:46:52.597 eqi, eqj = i, j
2025-07-01 05:46:52.606 continue
2025-07-01 05:46:52.616 cruncher.set_seq1(ai)
2025-07-01 05:46:52.625 # computing similarity is expensive, so use the quick
2025-07-01 05:46:52.633 # upper bounds first -- have seen this speed up messy
2025-07-01 05:46:52.639 # compares by a factor of 3.
2025-07-01 05:46:52.646 # note that ratio() is only expensive to compute the first
2025-07-01 05:46:52.654 # time it's called on a sequence pair; the expensive part
2025-07-01 05:46:52.659 # of the computation is cached by cruncher
2025-07-01 05:46:52.665 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:46:52.671 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:46:52.676 cruncher.ratio() > best_ratio:
2025-07-01 05:46:52.682 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:46:52.694 if best_ratio < cutoff:
2025-07-01 05:46:52.703 # no non-identical "pretty close" pair
2025-07-01 05:46:52.710 if eqi is None:
2025-07-01 05:46:52.722 # no identical pair either -- treat it as a straight replace
2025-07-01 05:46:52.732 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:46:52.744 return
2025-07-01 05:46:52.752 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:46:52.759 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:46:52.765 else:
2025-07-01 05:46:52.771 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:46:52.777 eqi = None
2025-07-01 05:46:52.782
2025-07-01 05:46:52.789 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:46:52.795 # identical
2025-07-01 05:46:52.800
2025-07-01 05:46:52.806 # pump out diffs from before the synch point
2025-07-01 05:46:52.813 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:46:52.819
2025-07-01 05:46:52.828 # do intraline marking on the synch pair
2025-07-01 05:46:52.836 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:46:52.843 if eqi is None:
2025-07-01 05:46:52.851 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:46:52.861 atags = btags = ""
2025-07-01 05:46:52.869 cruncher.set_seqs(aelt, belt)
2025-07-01 05:46:52.875 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:46:52.880 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:46:52.884 if tag == 'replace':
2025-07-01 05:46:52.889 atags += '^' * la
2025-07-01 05:46:52.895 btags += '^' * lb
2025-07-01 05:46:52.901 elif tag == 'delete':
2025-07-01 05:46:52.909 atags += '-' * la
2025-07-01 05:46:52.918 elif tag == 'insert':
2025-07-01 05:46:52.926 btags += '+' * lb
2025-07-01 05:46:52.932 elif tag == 'equal':
2025-07-01 05:46:52.937 atags += ' ' * la
2025-07-01 05:46:52.943 btags += ' ' * lb
2025-07-01 05:46:52.949 else:
2025-07-01 05:46:52.955 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:46:52.961 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:46:52.966 else:
2025-07-01 05:46:52.975 # the synch pair is identical
2025-07-01 05:46:52.983 yield ' ' + aelt
2025-07-01 05:46:52.990
2025-07-01 05:46:52.998 # pump out diffs from after the synch point
2025-07-01 05:46:53.007 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:46:53.013
2025-07-01 05:46:53.019 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:46:53.024 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:46:53.031
2025-07-01 05:46:53.041 self = <difflib.Differ object at [hex]>
2025-07-01 05:46:53.050 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:46:53.058 alo = 192, ahi = 1101
2025-07-01 05:46:53.069 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:46:53.077 blo = 192, bhi = 1101
2025-07-01 05:46:53.084
2025-07-01 05:46:53.091 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:46:53.097 g = []
2025-07-01 05:46:53.103 if alo < ahi:
2025-07-01 05:46:53.109 if blo < bhi:
2025-07-01 05:46:53.115 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:46:53.127 else:
2025-07-01 05:46:53.134 g = self._dump('-', a, alo, ahi)
2025-07-01 05:46:53.141 elif blo < bhi:
2025-07-01 05:46:53.147 g = self._dump('+', b, blo, bhi)
2025-07-01 05:46:53.155
2025-07-01 05:46:53.165 > yield from g
2025-07-01 05:46:53.173
2025-07-01 05:46:53.184 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:46:53.195 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:46:53.202
2025-07-01 05:46:53.211 self = <difflib.Differ object at [hex]>
2025-07-01 05:46:53.222 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:46:53.231 alo = 192, ahi = 1101
2025-07-01 05:46:53.240 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:46:53.247 blo = 192, bhi = 1101
2025-07-01 05:46:53.254
2025-07-01 05:46:53.261 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:46:53.267 r"""
2025-07-01 05:46:53.273 When replacing one block of lines with another, search the blocks
2025-07-01 05:46:53.279 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:46:53.285 synch point, and intraline difference marking is done on the
2025-07-01 05:46:53.291 similar pair. Lots of work, but often worth it.
2025-07-01 05:46:53.296
2025-07-01 05:46:53.302 Example:
2025-07-01 05:46:53.307
2025-07-01 05:46:53.315 >>> d = Differ()
2025-07-01 05:46:53.325 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:46:53.334 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:46:53.342 >>> print(''.join(results), end="")
2025-07-01 05:46:53.350 - abcDefghiJkl
2025-07-01 05:46:53.366 + abcdefGhijkl
2025-07-01 05:46:53.390 """
2025-07-01 05:46:53.404
2025-07-01 05:46:53.414 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:46:53.424 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:46:53.435 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:46:53.443 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:46:53.453 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:46:53.461
2025-07-01 05:46:53.474 # search for the pair that matches best without being identical
2025-07-01 05:46:53.482 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:46:53.490 # on junk -- unless we have to)
2025-07-01 05:46:53.499 for j in range(blo, bhi):
2025-07-01 05:46:53.510 bj = b[j]
2025-07-01 05:46:53.522 cruncher.set_seq2(bj)
2025-07-01 05:46:53.535 for i in range(alo, ahi):
2025-07-01 05:46:53.546 ai = a[i]
2025-07-01 05:46:53.555 if ai == bj:
2025-07-01 05:46:53.561 if eqi is None:
2025-07-01 05:46:53.567 eqi, eqj = i, j
2025-07-01 05:46:53.572 continue
2025-07-01 05:46:53.578 cruncher.set_seq1(ai)
2025-07-01 05:46:53.583 # computing similarity is expensive, so use the quick
2025-07-01 05:46:53.590 # upper bounds first -- have seen this speed up messy
2025-07-01 05:46:53.596 # compares by a factor of 3.
2025-07-01 05:46:53.602 # note that ratio() is only expensive to compute the first
2025-07-01 05:46:53.611 # time it's called on a sequence pair; the expensive part
2025-07-01 05:46:53.621 # of the computation is cached by cruncher
2025-07-01 05:46:53.629 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:46:53.638 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:46:53.648 cruncher.ratio() > best_ratio:
2025-07-01 05:46:53.657 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:46:53.664 if best_ratio < cutoff:
2025-07-01 05:46:53.670 # no non-identical "pretty close" pair
2025-07-01 05:46:53.681 if eqi is None:
2025-07-01 05:46:53.690 # no identical pair either -- treat it as a straight replace
2025-07-01 05:46:53.697 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:46:53.703 return
2025-07-01 05:46:53.709 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:46:53.714 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:46:53.720 else:
2025-07-01 05:46:53.726 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:46:53.731 eqi = None
2025-07-01 05:46:53.739
2025-07-01 05:46:53.749 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:46:53.759 # identical
2025-07-01 05:46:53.765
2025-07-01 05:46:53.772 # pump out diffs from before the synch point
2025-07-01 05:46:53.778 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:46:53.784
2025-07-01 05:46:53.791 # do intraline marking on the synch pair
2025-07-01 05:46:53.798 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:46:53.811 if eqi is None:
2025-07-01 05:46:53.820 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:46:53.829 atags = btags = ""
2025-07-01 05:46:53.837 cruncher.set_seqs(aelt, belt)
2025-07-01 05:46:53.848 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:46:53.858 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:46:53.868 if tag == 'replace':
2025-07-01 05:46:53.876 atags += '^' * la
2025-07-01 05:46:53.884 btags += '^' * lb
2025-07-01 05:46:53.891 elif tag == 'delete':
2025-07-01 05:46:53.897 atags += '-' * la
2025-07-01 05:46:53.902 elif tag == 'insert':
2025-07-01 05:46:53.912 btags += '+' * lb
2025-07-01 05:46:53.922 elif tag == 'equal':
2025-07-01 05:46:53.930 atags += ' ' * la
2025-07-01 05:46:53.939 btags += ' ' * lb
2025-07-01 05:46:53.951 else:
2025-07-01 05:46:53.962 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:46:53.974 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:46:53.984 else:
2025-07-01 05:46:53.994 # the synch pair is identical
2025-07-01 05:46:54.004 yield ' ' + aelt
2025-07-01 05:46:54.012
2025-07-01 05:46:54.020 # pump out diffs from after the synch point
2025-07-01 05:46:54.027 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:46:54.033
2025-07-01 05:46:54.038 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:46:54.045 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:46:54.050
2025-07-01 05:46:54.056 self = <difflib.Differ object at [hex]>
2025-07-01 05:46:54.069 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:46:54.077 alo = 193, ahi = 1101
2025-07-01 05:46:54.084 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:46:54.090 blo = 193, bhi = 1101
2025-07-01 05:46:54.095
2025-07-01 05:46:54.101 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:46:54.106 g = []
2025-07-01 05:46:54.116 if alo < ahi:
2025-07-01 05:46:54.128 if blo < bhi:
2025-07-01 05:46:54.140 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:46:54.151 else:
2025-07-01 05:46:54.162 g = self._dump('-', a, alo, ahi)
2025-07-01 05:46:54.175 elif blo < bhi:
2025-07-01 05:46:54.181 g = self._dump('+', b, blo, bhi)
2025-07-01 05:46:54.188
2025-07-01 05:46:54.195 > yield from g
2025-07-01 05:46:54.206
2025-07-01 05:46:54.213 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:46:54.222 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:46:54.231
2025-07-01 05:46:54.239 self = <difflib.Differ object at [hex]>
2025-07-01 05:46:54.245 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:46:54.251 alo = 193, ahi = 1101
2025-07-01 05:46:54.258 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:46:54.263 blo = 193, bhi = 1101
2025-07-01 05:46:54.269
2025-07-01 05:46:54.275 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:46:54.281 r"""
2025-07-01 05:46:54.287 When replacing one block of lines with another, search the blocks
2025-07-01 05:46:54.294 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:46:54.301 synch point, and intraline difference marking is done on the
2025-07-01 05:46:54.314 similar pair. Lots of work, but often worth it.
2025-07-01 05:46:54.323
2025-07-01 05:46:54.330 Example:
2025-07-01 05:46:54.336
2025-07-01 05:46:54.345 >>> d = Differ()
2025-07-01 05:46:54.356 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:46:54.365 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:46:54.373 >>> print(''.join(results), end="")
2025-07-01 05:46:54.380 - abcDefghiJkl
2025-07-01 05:46:54.394 + abcdefGhijkl
2025-07-01 05:46:54.416 """
2025-07-01 05:46:54.426
2025-07-01 05:46:54.433 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:46:54.439 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:46:54.446 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:46:54.453 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:46:54.460 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:46:54.467
2025-07-01 05:46:54.474 # search for the pair that matches best without being identical
2025-07-01 05:46:54.483 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:46:54.493 # on junk -- unless we have to)
2025-07-01 05:46:54.501 for j in range(blo, bhi):
2025-07-01 05:46:54.510 bj = b[j]
2025-07-01 05:46:54.521 cruncher.set_seq2(bj)
2025-07-01 05:46:54.531 for i in range(alo, ahi):
2025-07-01 05:46:54.539 ai = a[i]
2025-07-01 05:46:54.547 if ai == bj:
2025-07-01 05:46:54.557 if eqi is None:
2025-07-01 05:46:54.565 eqi, eqj = i, j
2025-07-01 05:46:54.571 continue
2025-07-01 05:46:54.577 cruncher.set_seq1(ai)
2025-07-01 05:46:54.583 # computing similarity is expensive, so use the quick
2025-07-01 05:46:54.588 # upper bounds first -- have seen this speed up messy
2025-07-01 05:46:54.594 # compares by a factor of 3.
2025-07-01 05:46:54.600 # note that ratio() is only expensive to compute the first
2025-07-01 05:46:54.607 # time it's called on a sequence pair; the expensive part
2025-07-01 05:46:54.614 # of the computation is cached by cruncher
2025-07-01 05:46:54.625 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:46:54.634 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:46:54.641 cruncher.ratio() > best_ratio:
2025-07-01 05:46:54.648 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:46:54.653 if best_ratio < cutoff:
2025-07-01 05:46:54.658 # no non-identical "pretty close" pair
2025-07-01 05:46:54.663 if eqi is None:
2025-07-01 05:46:54.669 # no identical pair either -- treat it as a straight replace
2025-07-01 05:46:54.675 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:46:54.682 return
2025-07-01 05:46:54.689 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:46:54.696 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:46:54.701 else:
2025-07-01 05:46:54.707 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:46:54.712 eqi = None
2025-07-01 05:46:54.718
2025-07-01 05:46:54.726 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:46:54.733 # identical
2025-07-01 05:46:54.740
2025-07-01 05:46:54.746 # pump out diffs from before the synch point
2025-07-01 05:46:54.755 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:46:54.766
2025-07-01 05:46:54.775 # do intraline marking on the synch pair
2025-07-01 05:46:54.783 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:46:54.794 if eqi is None:
2025-07-01 05:46:54.805 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:46:54.814 atags = btags = ""
2025-07-01 05:46:54.825 cruncher.set_seqs(aelt, belt)
2025-07-01 05:46:54.834 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:46:54.842 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:46:54.849 if tag == 'replace':
2025-07-01 05:46:54.855 atags += '^' * la
2025-07-01 05:46:54.863 btags += '^' * lb
2025-07-01 05:46:54.873 elif tag == 'delete':
2025-07-01 05:46:54.881 atags += '-' * la
2025-07-01 05:46:54.888 elif tag == 'insert':
2025-07-01 05:46:54.895 btags += '+' * lb
2025-07-01 05:46:54.901 elif tag == 'equal':
2025-07-01 05:46:54.906 atags += ' ' * la
2025-07-01 05:46:54.912 btags += ' ' * lb
2025-07-01 05:46:54.919 else:
2025-07-01 05:46:54.927 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:46:54.937 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:46:54.945 else:
2025-07-01 05:46:54.952 # the synch pair is identical
2025-07-01 05:46:54.958 yield ' ' + aelt
2025-07-01 05:46:54.964
2025-07-01 05:46:54.970 # pump out diffs from after the synch point
2025-07-01 05:46:54.976 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:46:54.982
2025-07-01 05:46:54.987 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:46:54.995 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:46:55.006
2025-07-01 05:46:55.015 self = <difflib.Differ object at [hex]>
2025-07-01 05:46:55.025 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:46:55.033 alo = 194, ahi = 1101
2025-07-01 05:46:55.040 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:46:55.049 blo = 194, bhi = 1101
2025-07-01 05:46:55.058
2025-07-01 05:46:55.064 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:46:55.071 g = []
2025-07-01 05:46:55.080 if alo < ahi:
2025-07-01 05:46:55.092 if blo < bhi:
2025-07-01 05:46:55.100 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:46:55.107 else:
2025-07-01 05:46:55.114 g = self._dump('-', a, alo, ahi)
2025-07-01 05:46:55.119 elif blo < bhi:
2025-07-01 05:46:55.125 g = self._dump('+', b, blo, bhi)
2025-07-01 05:46:55.131
2025-07-01 05:46:55.137 > yield from g
2025-07-01 05:46:55.143
2025-07-01 05:46:55.150 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:46:55.157 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:46:55.163
2025-07-01 05:46:55.170 self = <difflib.Differ object at [hex]>
2025-07-01 05:46:55.178 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:46:55.185 alo = 194, ahi = 1101
2025-07-01 05:46:55.198 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:46:55.210 blo = 194, bhi = 1101
2025-07-01 05:46:55.220
2025-07-01 05:46:55.230 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:46:55.243 r"""
2025-07-01 05:46:55.254 When replacing one block of lines with another, search the blocks
2025-07-01 05:46:55.262 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:46:55.269 synch point, and intraline difference marking is done on the
2025-07-01 05:46:55.281 similar pair. Lots of work, but often worth it.
2025-07-01 05:46:55.292
2025-07-01 05:46:55.301 Example:
2025-07-01 05:46:55.308
2025-07-01 05:46:55.314 >>> d = Differ()
2025-07-01 05:46:55.319 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:46:55.324 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:46:55.329 >>> print(''.join(results), end="")
2025-07-01 05:46:55.335 - abcDefghiJkl
2025-07-01 05:46:55.348 + abcdefGhijkl
2025-07-01 05:46:55.361 """
2025-07-01 05:46:55.367
2025-07-01 05:46:55.375 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:46:55.385 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:46:55.394 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:46:55.401 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:46:55.407 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:46:55.412
2025-07-01 05:46:55.419 # search for the pair that matches best without being identical
2025-07-01 05:46:55.425 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:46:55.431 # on junk -- unless we have to)
2025-07-01 05:46:55.439 for j in range(blo, bhi):
2025-07-01 05:46:55.451 bj = b[j]
2025-07-01 05:46:55.458 cruncher.set_seq2(bj)
2025-07-01 05:46:55.464 for i in range(alo, ahi):
2025-07-01 05:46:55.471 ai = a[i]
2025-07-01 05:46:55.477 if ai == bj:
2025-07-01 05:46:55.483 if eqi is None:
2025-07-01 05:46:55.494 eqi, eqj = i, j
2025-07-01 05:46:55.503 continue
2025-07-01 05:46:55.512 cruncher.set_seq1(ai)
2025-07-01 05:46:55.519 # computing similarity is expensive, so use the quick
2025-07-01 05:46:55.526 # upper bounds first -- have seen this speed up messy
2025-07-01 05:46:55.533 # compares by a factor of 3.
2025-07-01 05:46:55.540 # note that ratio() is only expensive to compute the first
2025-07-01 05:46:55.547 # time it's called on a sequence pair; the expensive part
2025-07-01 05:46:55.554 # of the computation is cached by cruncher
2025-07-01 05:46:55.560 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:46:55.567 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:46:55.579 cruncher.ratio() > best_ratio:
2025-07-01 05:46:55.587 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:46:55.593 if best_ratio < cutoff:
2025-07-01 05:46:55.599 # no non-identical "pretty close" pair
2025-07-01 05:46:55.605 if eqi is None:
2025-07-01 05:46:55.616 # no identical pair either -- treat it as a straight replace
2025-07-01 05:46:55.628 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:46:55.637 return
2025-07-01 05:46:55.644 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:46:55.650 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:46:55.654 else:
2025-07-01 05:46:55.660 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:46:55.665 eqi = None
2025-07-01 05:46:55.671
2025-07-01 05:46:55.676 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:46:55.682 # identical
2025-07-01 05:46:55.690
2025-07-01 05:46:55.698 # pump out diffs from before the synch point
2025-07-01 05:46:55.704 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:46:55.711
2025-07-01 05:46:55.717 # do intraline marking on the synch pair
2025-07-01 05:46:55.723 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:46:55.728 if eqi is None:
2025-07-01 05:46:55.734 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:46:55.741 atags = btags = ""
2025-07-01 05:46:55.748 cruncher.set_seqs(aelt, belt)
2025-07-01 05:46:55.754 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:46:55.760 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:46:55.767 if tag == 'replace':
2025-07-01 05:46:55.772 atags += '^' * la
2025-07-01 05:46:55.778 btags += '^' * lb
2025-07-01 05:46:55.784 elif tag == 'delete':
2025-07-01 05:46:55.789 atags += '-' * la
2025-07-01 05:46:55.795 elif tag == 'insert':
2025-07-01 05:46:55.800 btags += '+' * lb
2025-07-01 05:46:55.807 elif tag == 'equal':
2025-07-01 05:46:55.813 atags += ' ' * la
2025-07-01 05:46:55.818 btags += ' ' * lb
2025-07-01 05:46:55.823 else:
2025-07-01 05:46:55.828 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:46:55.832 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:46:55.837 else:
2025-07-01 05:46:55.842 # the synch pair is identical
2025-07-01 05:46:55.848 yield ' ' + aelt
2025-07-01 05:46:55.854
2025-07-01 05:46:55.860 # pump out diffs from after the synch point
2025-07-01 05:46:55.869 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:46:55.877
2025-07-01 05:46:55.885 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:46:55.891 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:46:55.896
2025-07-01 05:46:55.901 self = <difflib.Differ object at [hex]>
2025-07-01 05:46:55.906 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:46:55.911 alo = 195, ahi = 1101
2025-07-01 05:46:55.918 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:46:55.924 blo = 195, bhi = 1101
2025-07-01 05:46:55.929
2025-07-01 05:46:55.935 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:46:55.940 g = []
2025-07-01 05:46:55.945 if alo < ahi:
2025-07-01 05:46:55.951 if blo < bhi:
2025-07-01 05:46:55.958 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:46:55.968 else:
2025-07-01 05:46:55.977 g = self._dump('-', a, alo, ahi)
2025-07-01 05:46:55.984 elif blo < bhi:
2025-07-01 05:46:55.990 g = self._dump('+', b, blo, bhi)
2025-07-01 05:46:55.995
2025-07-01 05:46:56.000 > yield from g
2025-07-01 05:46:56.004
2025-07-01 05:46:56.009 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:46:56.015 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:46:56.020
2025-07-01 05:46:56.027 self = <difflib.Differ object at [hex]>
2025-07-01 05:46:56.035 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:46:56.040 alo = 195, ahi = 1101
2025-07-01 05:46:56.047 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:46:56.054 blo = 195, bhi = 1101
2025-07-01 05:46:56.065
2025-07-01 05:46:56.073 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:46:56.079 r"""
2025-07-01 05:46:56.087 When replacing one block of lines with another, search the blocks
2025-07-01 05:46:56.097 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:46:56.106 synch point, and intraline difference marking is done on the
2025-07-01 05:46:56.116 similar pair. Lots of work, but often worth it.
2025-07-01 05:46:56.127
2025-07-01 05:46:56.136 Example:
2025-07-01 05:46:56.144
2025-07-01 05:46:56.151 >>> d = Differ()
2025-07-01 05:46:56.158 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:46:56.164 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:46:56.170 >>> print(''.join(results), end="")
2025-07-01 05:46:56.175 - abcDefghiJkl
2025-07-01 05:46:56.186 + abcdefGhijkl
2025-07-01 05:46:56.199 """
2025-07-01 05:46:56.208
2025-07-01 05:46:56.220 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:46:56.230 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:46:56.239 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:46:56.248 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:46:56.256 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:46:56.262
2025-07-01 05:46:56.273 # search for the pair that matches best without being identical
2025-07-01 05:46:56.281 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:46:56.289 # on junk -- unless we have to)
2025-07-01 05:46:56.295 for j in range(blo, bhi):
2025-07-01 05:46:56.302 bj = b[j]
2025-07-01 05:46:56.313 cruncher.set_seq2(bj)
2025-07-01 05:46:56.322 for i in range(alo, ahi):
2025-07-01 05:46:56.332 ai = a[i]
2025-07-01 05:46:56.342 if ai == bj:
2025-07-01 05:46:56.354 if eqi is None:
2025-07-01 05:46:56.362 eqi, eqj = i, j
2025-07-01 05:46:56.373 continue
2025-07-01 05:46:56.378 cruncher.set_seq1(ai)
2025-07-01 05:46:56.385 # computing similarity is expensive, so use the quick
2025-07-01 05:46:56.395 # upper bounds first -- have seen this speed up messy
2025-07-01 05:46:56.401 # compares by a factor of 3.
2025-07-01 05:46:56.408 # note that ratio() is only expensive to compute the first
2025-07-01 05:46:56.415 # time it's called on a sequence pair; the expensive part
2025-07-01 05:46:56.423 # of the computation is cached by cruncher
2025-07-01 05:46:56.431 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:46:56.442 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:46:56.450 cruncher.ratio() > best_ratio:
2025-07-01 05:46:56.456 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:46:56.462 if best_ratio < cutoff:
2025-07-01 05:46:56.467 # no non-identical "pretty close" pair
2025-07-01 05:46:56.473 if eqi is None:
2025-07-01 05:46:56.480 # no identical pair either -- treat it as a straight replace
2025-07-01 05:46:56.487 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:46:56.494 return
2025-07-01 05:46:56.501 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:46:56.511 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:46:56.523 else:
2025-07-01 05:46:56.530 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:46:56.537 eqi = None
2025-07-01 05:46:56.544
2025-07-01 05:46:56.550 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:46:56.557 # identical
2025-07-01 05:46:56.563
2025-07-01 05:46:56.569 # pump out diffs from before the synch point
2025-07-01 05:46:56.575 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:46:56.586
2025-07-01 05:46:56.598 # do intraline marking on the synch pair
2025-07-01 05:46:56.607 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:46:56.615 if eqi is None:
2025-07-01 05:46:56.621 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:46:56.628 atags = btags = ""
2025-07-01 05:46:56.634 cruncher.set_seqs(aelt, belt)
2025-07-01 05:46:56.640 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:46:56.646 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:46:56.651 if tag == 'replace':
2025-07-01 05:46:56.658 atags += '^' * la
2025-07-01 05:46:56.665 btags += '^' * lb
2025-07-01 05:46:56.672 elif tag == 'delete':
2025-07-01 05:46:56.678 atags += '-' * la
2025-07-01 05:46:56.684 elif tag == 'insert':
2025-07-01 05:46:56.690 btags += '+' * lb
2025-07-01 05:46:56.696 elif tag == 'equal':
2025-07-01 05:46:56.702 atags += ' ' * la
2025-07-01 05:46:56.708 btags += ' ' * lb
2025-07-01 05:46:56.714 else:
2025-07-01 05:46:56.720 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:46:56.725 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:46:56.731 else:
2025-07-01 05:46:56.738 # the synch pair is identical
2025-07-01 05:46:56.744 yield ' ' + aelt
2025-07-01 05:46:56.750
2025-07-01 05:46:56.756 # pump out diffs from after the synch point
2025-07-01 05:46:56.761 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:46:56.767
2025-07-01 05:46:56.772 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:46:56.778 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:46:56.783
2025-07-01 05:46:56.789 self = <difflib.Differ object at [hex]>
2025-07-01 05:46:56.795 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:46:56.801 alo = 196, ahi = 1101
2025-07-01 05:46:56.807 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:46:56.813 blo = 196, bhi = 1101
2025-07-01 05:46:56.821
2025-07-01 05:46:56.831 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:46:56.837 g = []
2025-07-01 05:46:56.844 if alo < ahi:
2025-07-01 05:46:56.850 if blo < bhi:
2025-07-01 05:46:56.861 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:46:56.870 else:
2025-07-01 05:46:56.880 g = self._dump('-', a, alo, ahi)
2025-07-01 05:46:56.890 elif blo < bhi:
2025-07-01 05:46:56.898 g = self._dump('+', b, blo, bhi)
2025-07-01 05:46:56.905
2025-07-01 05:46:56.909 > yield from g
2025-07-01 05:46:56.914
2025-07-01 05:46:56.918 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:46:56.923 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:46:56.927
2025-07-01 05:46:56.931 self = <difflib.Differ object at [hex]>
2025-07-01 05:46:56.936 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:46:56.941 alo = 196, ahi = 1101
2025-07-01 05:46:56.945 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:46:56.950 blo = 196, bhi = 1101
2025-07-01 05:46:56.954
2025-07-01 05:46:56.959 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:46:56.963 r"""
2025-07-01 05:46:56.968 When replacing one block of lines with another, search the blocks
2025-07-01 05:46:56.973 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:46:56.977 synch point, and intraline difference marking is done on the
2025-07-01 05:46:56.982 similar pair. Lots of work, but often worth it.
2025-07-01 05:46:56.986
2025-07-01 05:46:56.992 Example:
2025-07-01 05:46:56.998
2025-07-01 05:46:57.008 >>> d = Differ()
2025-07-01 05:46:57.018 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:46:57.025 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:46:57.031 >>> print(''.join(results), end="")
2025-07-01 05:46:57.038 - abcDefghiJkl
2025-07-01 05:46:57.057 + abcdefGhijkl
2025-07-01 05:46:57.079 """
2025-07-01 05:46:57.087
2025-07-01 05:46:57.096 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:46:57.112 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:46:57.122 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:46:57.130 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:46:57.136 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:46:57.142
2025-07-01 05:46:57.147 # search for the pair that matches best without being identical
2025-07-01 05:46:57.152 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:46:57.156 # on junk -- unless we have to)
2025-07-01 05:46:57.162 for j in range(blo, bhi):
2025-07-01 05:46:57.169 bj = b[j]
2025-07-01 05:46:57.177 cruncher.set_seq2(bj)
2025-07-01 05:46:57.183 for i in range(alo, ahi):
2025-07-01 05:46:57.188 ai = a[i]
2025-07-01 05:46:57.194 if ai == bj:
2025-07-01 05:46:57.200 if eqi is None:
2025-07-01 05:46:57.207 eqi, eqj = i, j
2025-07-01 05:46:57.219 continue
2025-07-01 05:46:57.231 cruncher.set_seq1(ai)
2025-07-01 05:46:57.242 # computing similarity is expensive, so use the quick
2025-07-01 05:46:57.251 # upper bounds first -- have seen this speed up messy
2025-07-01 05:46:57.261 # compares by a factor of 3.
2025-07-01 05:46:57.268 # note that ratio() is only expensive to compute the first
2025-07-01 05:46:57.274 # time it's called on a sequence pair; the expensive part
2025-07-01 05:46:57.287 # of the computation is cached by cruncher
2025-07-01 05:46:57.295 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:46:57.304 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:46:57.312 cruncher.ratio() > best_ratio:
2025-07-01 05:46:57.320 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:46:57.328 if best_ratio < cutoff:
2025-07-01 05:46:57.335 # no non-identical "pretty close" pair
2025-07-01 05:46:57.342 if eqi is None:
2025-07-01 05:46:57.352 # no identical pair either -- treat it as a straight replace
2025-07-01 05:46:57.364 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:46:57.373 return
2025-07-01 05:46:57.380 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:46:57.387 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:46:57.395 else:
2025-07-01 05:46:57.406 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:46:57.417 eqi = None
2025-07-01 05:46:57.427
2025-07-01 05:46:57.435 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:46:57.443 # identical
2025-07-01 05:46:57.450
2025-07-01 05:46:57.457 # pump out diffs from before the synch point
2025-07-01 05:46:57.471 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:46:57.483
2025-07-01 05:46:57.497 # do intraline marking on the synch pair
2025-07-01 05:46:57.511 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:46:57.520 if eqi is None:
2025-07-01 05:46:57.528 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:46:57.535 atags = btags = ""
2025-07-01 05:46:57.542 cruncher.set_seqs(aelt, belt)
2025-07-01 05:46:57.551 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:46:57.563 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:46:57.573 if tag == 'replace':
2025-07-01 05:46:57.584 atags += '^' * la
2025-07-01 05:46:57.598 btags += '^' * lb
2025-07-01 05:46:57.607 elif tag == 'delete':
2025-07-01 05:46:57.615 atags += '-' * la
2025-07-01 05:46:57.622 elif tag == 'insert':
2025-07-01 05:46:57.633 btags += '+' * lb
2025-07-01 05:46:57.642 elif tag == 'equal':
2025-07-01 05:46:57.651 atags += ' ' * la
2025-07-01 05:46:57.661 btags += ' ' * lb
2025-07-01 05:46:57.675 else:
2025-07-01 05:46:57.686 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:46:57.697 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:46:57.705 else:
2025-07-01 05:46:57.713 # the synch pair is identical
2025-07-01 05:46:57.720 yield ' ' + aelt
2025-07-01 05:46:57.727
2025-07-01 05:46:57.733 # pump out diffs from after the synch point
2025-07-01 05:46:57.744 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:46:57.754
2025-07-01 05:46:57.764 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:46:57.774 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:46:57.786
2025-07-01 05:46:57.797 self = <difflib.Differ object at [hex]>
2025-07-01 05:46:57.808 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:46:57.816 alo = 197, ahi = 1101
2025-07-01 05:46:57.829 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:46:57.839 blo = 197, bhi = 1101
2025-07-01 05:46:57.846
2025-07-01 05:46:57.855 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:46:57.866 g = []
2025-07-01 05:46:57.877 if alo < ahi:
2025-07-01 05:46:57.892 if blo < bhi:
2025-07-01 05:46:57.905 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:46:57.915 else:
2025-07-01 05:46:57.922 g = self._dump('-', a, alo, ahi)
2025-07-01 05:46:57.929 elif blo < bhi:
2025-07-01 05:46:57.936 g = self._dump('+', b, blo, bhi)
2025-07-01 05:46:57.943
2025-07-01 05:46:57.950 > yield from g
2025-07-01 05:46:57.957
2025-07-01 05:46:57.970 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:46:57.981 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:46:57.990
2025-07-01 05:46:58.003 self = <difflib.Differ object at [hex]>
2025-07-01 05:46:58.017 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:46:58.029 alo = 197, ahi = 1101
2025-07-01 05:46:58.039 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:46:58.048 blo = 197, bhi = 1101
2025-07-01 05:46:58.055
2025-07-01 05:46:58.062 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:46:58.069 r"""
2025-07-01 05:46:58.075 When replacing one block of lines with another, search the blocks
2025-07-01 05:46:58.082 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:46:58.089 synch point, and intraline difference marking is done on the
2025-07-01 05:46:58.095 similar pair. Lots of work, but often worth it.
2025-07-01 05:46:58.101
2025-07-01 05:46:58.108 Example:
2025-07-01 05:46:58.115
2025-07-01 05:46:58.128 >>> d = Differ()
2025-07-01 05:46:58.138 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:46:58.148 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:46:58.160 >>> print(''.join(results), end="")
2025-07-01 05:46:58.170 - abcDefghiJkl
2025-07-01 05:46:58.191 + abcdefGhijkl
2025-07-01 05:46:58.213 """
2025-07-01 05:46:58.221
2025-07-01 05:46:58.227 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:46:58.236 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:46:58.247 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:46:58.256 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:46:58.264 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:46:58.271
2025-07-01 05:46:58.277 # search for the pair that matches best without being identical
2025-07-01 05:46:58.286 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:46:58.293 # on junk -- unless we have to)
2025-07-01 05:46:58.300 for j in range(blo, bhi):
2025-07-01 05:46:58.307 bj = b[j]
2025-07-01 05:46:58.312 cruncher.set_seq2(bj)
2025-07-01 05:46:58.318 for i in range(alo, ahi):
2025-07-01 05:46:58.325 ai = a[i]
2025-07-01 05:46:58.331 if ai == bj:
2025-07-01 05:46:58.337 if eqi is None:
2025-07-01 05:46:58.343 eqi, eqj = i, j
2025-07-01 05:46:58.355 continue
2025-07-01 05:46:58.368 cruncher.set_seq1(ai)
2025-07-01 05:46:58.381 # computing similarity is expensive, so use the quick
2025-07-01 05:46:58.396 # upper bounds first -- have seen this speed up messy
2025-07-01 05:46:58.407 # compares by a factor of 3.
2025-07-01 05:46:58.414 # note that ratio() is only expensive to compute the first
2025-07-01 05:46:58.424 # time it's called on a sequence pair; the expensive part
2025-07-01 05:46:58.435 # of the computation is cached by cruncher
2025-07-01 05:46:58.445 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:46:58.453 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:46:58.461 cruncher.ratio() > best_ratio:
2025-07-01 05:46:58.468 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:46:58.475 if best_ratio < cutoff:
2025-07-01 05:46:58.483 # no non-identical "pretty close" pair
2025-07-01 05:46:58.492 if eqi is None:
2025-07-01 05:46:58.506 # no identical pair either -- treat it as a straight replace
2025-07-01 05:46:58.518 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:46:58.531 return
2025-07-01 05:46:58.545 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:46:58.553 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:46:58.565 else:
2025-07-01 05:46:58.575 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:46:58.583 eqi = None
2025-07-01 05:46:58.591
2025-07-01 05:46:58.597 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:46:58.603 # identical
2025-07-01 05:46:58.615
2025-07-01 05:46:58.623 # pump out diffs from before the synch point
2025-07-01 05:46:58.631 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:46:58.638
2025-07-01 05:46:58.647 # do intraline marking on the synch pair
2025-07-01 05:46:58.659 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:46:58.667 if eqi is None:
2025-07-01 05:46:58.675 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:46:58.685 atags = btags = ""
2025-07-01 05:46:58.695 cruncher.set_seqs(aelt, belt)
2025-07-01 05:46:58.706 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:46:58.715 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:46:58.723 if tag == 'replace':
2025-07-01 05:46:58.734 atags += '^' * la
2025-07-01 05:46:58.742 btags += '^' * lb
2025-07-01 05:46:58.749 elif tag == 'delete':
2025-07-01 05:46:58.763 atags += '-' * la
2025-07-01 05:46:58.774 elif tag == 'insert':
2025-07-01 05:46:58.785 btags += '+' * lb
2025-07-01 05:46:58.795 elif tag == 'equal':
2025-07-01 05:46:58.803 atags += ' ' * la
2025-07-01 05:46:58.813 btags += ' ' * lb
2025-07-01 05:46:58.823 else:
2025-07-01 05:46:58.831 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:46:58.839 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:46:58.849 else:
2025-07-01 05:46:58.858 # the synch pair is identical
2025-07-01 05:46:58.869 yield ' ' + aelt
2025-07-01 05:46:58.880
2025-07-01 05:46:58.888 # pump out diffs from after the synch point
2025-07-01 05:46:58.896 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:46:58.903
2025-07-01 05:46:58.909 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:46:58.914 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:46:58.920
2025-07-01 05:46:58.925 self = <difflib.Differ object at [hex]>
2025-07-01 05:46:58.931 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:46:58.937 alo = 198, ahi = 1101
2025-07-01 05:46:58.943 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:46:58.949 blo = 198, bhi = 1101
2025-07-01 05:46:58.955
2025-07-01 05:46:58.962 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:46:58.969 g = []
2025-07-01 05:46:58.976 if alo < ahi:
2025-07-01 05:46:58.983 if blo < bhi:
2025-07-01 05:46:58.990 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:46:58.998 else:
2025-07-01 05:46:59.006 g = self._dump('-', a, alo, ahi)
2025-07-01 05:46:59.013 elif blo < bhi:
2025-07-01 05:46:59.021 g = self._dump('+', b, blo, bhi)
2025-07-01 05:46:59.028
2025-07-01 05:46:59.035 > yield from g
2025-07-01 05:46:59.042
2025-07-01 05:46:59.050 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:46:59.057 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:46:59.064
2025-07-01 05:46:59.071 self = <difflib.Differ object at [hex]>
2025-07-01 05:46:59.078 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:46:59.085 alo = 198, ahi = 1101
2025-07-01 05:46:59.094 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:46:59.102 blo = 198, bhi = 1101
2025-07-01 05:46:59.109
2025-07-01 05:46:59.117 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:46:59.125 r"""
2025-07-01 05:46:59.133 When replacing one block of lines with another, search the blocks
2025-07-01 05:46:59.141 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:46:59.149 synch point, and intraline difference marking is done on the
2025-07-01 05:46:59.157 similar pair. Lots of work, but often worth it.
2025-07-01 05:46:59.165
2025-07-01 05:46:59.177 Example:
2025-07-01 05:46:59.187
2025-07-01 05:46:59.196 >>> d = Differ()
2025-07-01 05:46:59.210 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:46:59.219 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:46:59.227 >>> print(''.join(results), end="")
2025-07-01 05:46:59.234 - abcDefghiJkl
2025-07-01 05:46:59.250 + abcdefGhijkl
2025-07-01 05:46:59.267 """
2025-07-01 05:46:59.276
2025-07-01 05:46:59.285 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:46:59.293 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:46:59.302 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:46:59.311 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:46:59.323 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:46:59.336
2025-07-01 05:46:59.346 # search for the pair that matches best without being identical
2025-07-01 05:46:59.359 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:46:59.370 # on junk -- unless we have to)
2025-07-01 05:46:59.379 for j in range(blo, bhi):
2025-07-01 05:46:59.386 bj = b[j]
2025-07-01 05:46:59.395 cruncher.set_seq2(bj)
2025-07-01 05:46:59.403 for i in range(alo, ahi):
2025-07-01 05:46:59.411 ai = a[i]
2025-07-01 05:46:59.420 if ai == bj:
2025-07-01 05:46:59.429 if eqi is None:
2025-07-01 05:46:59.440 eqi, eqj = i, j
2025-07-01 05:46:59.453 continue
2025-07-01 05:46:59.463 cruncher.set_seq1(ai)
2025-07-01 05:46:59.472 # computing similarity is expensive, so use the quick
2025-07-01 05:46:59.479 # upper bounds first -- have seen this speed up messy
2025-07-01 05:46:59.487 # compares by a factor of 3.
2025-07-01 05:46:59.495 # note that ratio() is only expensive to compute the first
2025-07-01 05:46:59.503 # time it's called on a sequence pair; the expensive part
2025-07-01 05:46:59.519 # of the computation is cached by cruncher
2025-07-01 05:46:59.531 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:46:59.540 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:46:59.549 cruncher.ratio() > best_ratio:
2025-07-01 05:46:59.559 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:46:59.569 if best_ratio < cutoff:
2025-07-01 05:46:59.577 # no non-identical "pretty close" pair
2025-07-01 05:46:59.584 if eqi is None:
2025-07-01 05:46:59.590 # no identical pair either -- treat it as a straight replace
2025-07-01 05:46:59.596 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:46:59.602 return
2025-07-01 05:46:59.612 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:46:59.624 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:46:59.632 else:
2025-07-01 05:46:59.641 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:46:59.648 eqi = None
2025-07-01 05:46:59.661
2025-07-01 05:46:59.673 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:46:59.684 # identical
2025-07-01 05:46:59.696
2025-07-01 05:46:59.706 # pump out diffs from before the synch point
2025-07-01 05:46:59.715 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:46:59.727
2025-07-01 05:46:59.736 # do intraline marking on the synch pair
2025-07-01 05:46:59.744 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:46:59.752 if eqi is None:
2025-07-01 05:46:59.758 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:46:59.768 atags = btags = ""
2025-07-01 05:46:59.778 cruncher.set_seqs(aelt, belt)
2025-07-01 05:46:59.786 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:46:59.793 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:46:59.802 if tag == 'replace':
2025-07-01 05:46:59.809 atags += '^' * la
2025-07-01 05:46:59.815 btags += '^' * lb
2025-07-01 05:46:59.820 elif tag == 'delete':
2025-07-01 05:46:59.828 atags += '-' * la
2025-07-01 05:46:59.840 elif tag == 'insert':
2025-07-01 05:46:59.851 btags += '+' * lb
2025-07-01 05:46:59.860 elif tag == 'equal':
2025-07-01 05:46:59.868 atags += ' ' * la
2025-07-01 05:46:59.875 btags += ' ' * lb
2025-07-01 05:46:59.881 else:
2025-07-01 05:46:59.887 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:46:59.893 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:46:59.899 else:
2025-07-01 05:46:59.905 # the synch pair is identical
2025-07-01 05:46:59.910 yield ' ' + aelt
2025-07-01 05:46:59.915
2025-07-01 05:46:59.923 # pump out diffs from after the synch point
2025-07-01 05:46:59.933 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:46:59.943
2025-07-01 05:46:59.951 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:46:59.959 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:46:59.970
2025-07-01 05:46:59.983 self = <difflib.Differ object at [hex]>
2025-07-01 05:46:59.995 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:47:00.016 alo = 199, ahi = 1101
2025-07-01 05:47:00.027 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:47:00.035 blo = 199, bhi = 1101
2025-07-01 05:47:00.042
2025-07-01 05:47:00.049 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:47:00.055 g = []
2025-07-01 05:47:00.061 if alo < ahi:
2025-07-01 05:47:00.066 if blo < bhi:
2025-07-01 05:47:00.071 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:47:00.076 else:
2025-07-01 05:47:00.082 g = self._dump('-', a, alo, ahi)
2025-07-01 05:47:00.088 elif blo < bhi:
2025-07-01 05:47:00.095 g = self._dump('+', b, blo, bhi)
2025-07-01 05:47:00.102
2025-07-01 05:47:00.109 > yield from g
2025-07-01 05:47:00.118
2025-07-01 05:47:00.125 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:47:00.132 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:47:00.139
2025-07-01 05:47:00.147 self = <difflib.Differ object at [hex]>
2025-07-01 05:47:00.153 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:47:00.159 alo = 199, ahi = 1101
2025-07-01 05:47:00.167 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:47:00.173 blo = 199, bhi = 1101
2025-07-01 05:47:00.178
2025-07-01 05:47:00.190 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:47:00.199 r"""
2025-07-01 05:47:00.206 When replacing one block of lines with another, search the blocks
2025-07-01 05:47:00.214 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:47:00.222 synch point, and intraline difference marking is done on the
2025-07-01 05:47:00.233 similar pair. Lots of work, but often worth it.
2025-07-01 05:47:00.242
2025-07-01 05:47:00.253 Example:
2025-07-01 05:47:00.263
2025-07-01 05:47:00.271 >>> d = Differ()
2025-07-01 05:47:00.278 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:47:00.288 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:47:00.301 >>> print(''.join(results), end="")
2025-07-01 05:47:00.309 - abcDefghiJkl
2025-07-01 05:47:00.324 + abcdefGhijkl
2025-07-01 05:47:00.343 """
2025-07-01 05:47:00.354
2025-07-01 05:47:00.363 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:47:00.371 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:47:00.378 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:47:00.389 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:47:00.400 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:47:00.412
2025-07-01 05:47:00.423 # search for the pair that matches best without being identical
2025-07-01 05:47:00.434 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:47:00.444 # on junk -- unless we have to)
2025-07-01 05:47:00.452 for j in range(blo, bhi):
2025-07-01 05:47:00.459 bj = b[j]
2025-07-01 05:47:00.465 cruncher.set_seq2(bj)
2025-07-01 05:47:00.470 for i in range(alo, ahi):
2025-07-01 05:47:00.478 ai = a[i]
2025-07-01 05:47:00.486 if ai == bj:
2025-07-01 05:47:00.492 if eqi is None:
2025-07-01 05:47:00.498 eqi, eqj = i, j
2025-07-01 05:47:00.505 continue
2025-07-01 05:47:00.513 cruncher.set_seq1(ai)
2025-07-01 05:47:00.520 # computing similarity is expensive, so use the quick
2025-07-01 05:47:00.527 # upper bounds first -- have seen this speed up messy
2025-07-01 05:47:00.532 # compares by a factor of 3.
2025-07-01 05:47:00.538 # note that ratio() is only expensive to compute the first
2025-07-01 05:47:00.544 # time it's called on a sequence pair; the expensive part
2025-07-01 05:47:00.550 # of the computation is cached by cruncher
2025-07-01 05:47:00.556 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:47:00.562 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:47:00.568 cruncher.ratio() > best_ratio:
2025-07-01 05:47:00.574 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:47:00.586 if best_ratio < cutoff:
2025-07-01 05:47:00.593 # no non-identical "pretty close" pair
2025-07-01 05:47:00.598 if eqi is None:
2025-07-01 05:47:00.610 # no identical pair either -- treat it as a straight replace
2025-07-01 05:47:00.621 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:47:00.631 return
2025-07-01 05:47:00.641 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:47:00.649 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:47:00.657 else:
2025-07-01 05:47:00.665 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:47:00.671 eqi = None
2025-07-01 05:47:00.677
2025-07-01 05:47:00.685 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:47:00.691 # identical
2025-07-01 05:47:00.697
2025-07-01 05:47:00.704 # pump out diffs from before the synch point
2025-07-01 05:47:00.711 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:47:00.717
2025-07-01 05:47:00.724 # do intraline marking on the synch pair
2025-07-01 05:47:00.731 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:47:00.737 if eqi is None:
2025-07-01 05:47:00.748 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:47:00.757 atags = btags = ""
2025-07-01 05:47:00.765 cruncher.set_seqs(aelt, belt)
2025-07-01 05:47:00.771 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:47:00.776 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:47:00.781 if tag == 'replace':
2025-07-01 05:47:00.786 atags += '^' * la
2025-07-01 05:47:00.790 btags += '^' * lb
2025-07-01 05:47:00.795 elif tag == 'delete':
2025-07-01 05:47:00.800 atags += '-' * la
2025-07-01 05:47:00.806 elif tag == 'insert':
2025-07-01 05:47:00.816 btags += '+' * lb
2025-07-01 05:47:00.825 elif tag == 'equal':
2025-07-01 05:47:00.833 atags += ' ' * la
2025-07-01 05:47:00.841 btags += ' ' * lb
2025-07-01 05:47:00.847 else:
2025-07-01 05:47:00.854 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:47:00.864 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:47:00.872 else:
2025-07-01 05:47:00.879 # the synch pair is identical
2025-07-01 05:47:00.885 yield ' ' + aelt
2025-07-01 05:47:00.890
2025-07-01 05:47:00.900 # pump out diffs from after the synch point
2025-07-01 05:47:00.910 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:47:00.918
2025-07-01 05:47:00.924 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:47:00.937 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:47:00.949
2025-07-01 05:47:00.960 self = <difflib.Differ object at [hex]>
2025-07-01 05:47:00.969 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:47:00.975 alo = 202, ahi = 1101
2025-07-01 05:47:00.983 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:47:00.990 blo = 202, bhi = 1101
2025-07-01 05:47:01.002
2025-07-01 05:47:01.012 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:47:01.019 g = []
2025-07-01 05:47:01.027 if alo < ahi:
2025-07-01 05:47:01.037 if blo < bhi:
2025-07-01 05:47:01.049 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:47:01.063 else:
2025-07-01 05:47:01.073 g = self._dump('-', a, alo, ahi)
2025-07-01 05:47:01.080 elif blo < bhi:
2025-07-01 05:47:01.087 g = self._dump('+', b, blo, bhi)
2025-07-01 05:47:01.097
2025-07-01 05:47:01.110 > yield from g
2025-07-01 05:47:01.119
2025-07-01 05:47:01.127 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:47:01.138 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:47:01.148
2025-07-01 05:47:01.157 self = <difflib.Differ object at [hex]>
2025-07-01 05:47:01.166 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:47:01.172 alo = 202, ahi = 1101
2025-07-01 05:47:01.181 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:47:01.188 blo = 202, bhi = 1101
2025-07-01 05:47:01.194
2025-07-01 05:47:01.203 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:47:01.214 r"""
2025-07-01 05:47:01.226 When replacing one block of lines with another, search the blocks
2025-07-01 05:47:01.237 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:47:01.247 synch point, and intraline difference marking is done on the
2025-07-01 05:47:01.256 similar pair. Lots of work, but often worth it.
2025-07-01 05:47:01.263
2025-07-01 05:47:01.273 Example:
2025-07-01 05:47:01.282
2025-07-01 05:47:01.291 >>> d = Differ()
2025-07-01 05:47:01.299 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:47:01.306 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:47:01.312 >>> print(''.join(results), end="")
2025-07-01 05:47:01.318 - abcDefghiJkl
2025-07-01 05:47:01.343 + abcdefGhijkl
2025-07-01 05:47:01.361 """
2025-07-01 05:47:01.367
2025-07-01 05:47:01.374 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:47:01.383 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:47:01.389 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:47:01.394 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:47:01.399 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:47:01.403
2025-07-01 05:47:01.408 # search for the pair that matches best without being identical
2025-07-01 05:47:01.412 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:47:01.417 # on junk -- unless we have to)
2025-07-01 05:47:01.422 for j in range(blo, bhi):
2025-07-01 05:47:01.427 bj = b[j]
2025-07-01 05:47:01.433 cruncher.set_seq2(bj)
2025-07-01 05:47:01.439 for i in range(alo, ahi):
2025-07-01 05:47:01.446 ai = a[i]
2025-07-01 05:47:01.452 if ai == bj:
2025-07-01 05:47:01.458 if eqi is None:
2025-07-01 05:47:01.464 eqi, eqj = i, j
2025-07-01 05:47:01.470 continue
2025-07-01 05:47:01.477 cruncher.set_seq1(ai)
2025-07-01 05:47:01.483 # computing similarity is expensive, so use the quick
2025-07-01 05:47:01.489 # upper bounds first -- have seen this speed up messy
2025-07-01 05:47:01.495 # compares by a factor of 3.
2025-07-01 05:47:01.501 # note that ratio() is only expensive to compute the first
2025-07-01 05:47:01.508 # time it's called on a sequence pair; the expensive part
2025-07-01 05:47:01.514 # of the computation is cached by cruncher
2025-07-01 05:47:01.520 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:47:01.526 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:47:01.532 cruncher.ratio() > best_ratio:
2025-07-01 05:47:01.538 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:47:01.544 if best_ratio < cutoff:
2025-07-01 05:47:01.550 # no non-identical "pretty close" pair
2025-07-01 05:47:01.556 if eqi is None:
2025-07-01 05:47:01.562 # no identical pair either -- treat it as a straight replace
2025-07-01 05:47:01.568 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:47:01.574 return
2025-07-01 05:47:01.580 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:47:01.586 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:47:01.592 else:
2025-07-01 05:47:01.598 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:47:01.607 eqi = None
2025-07-01 05:47:01.617
2025-07-01 05:47:01.624 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:47:01.631 # identical
2025-07-01 05:47:01.641
2025-07-01 05:47:01.649 # pump out diffs from before the synch point
2025-07-01 05:47:01.662 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:47:01.672
2025-07-01 05:47:01.684 # do intraline marking on the synch pair
2025-07-01 05:47:01.693 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:47:01.701 if eqi is None:
2025-07-01 05:47:01.709 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:47:01.715 atags = btags = ""
2025-07-01 05:47:01.721 cruncher.set_seqs(aelt, belt)
2025-07-01 05:47:01.734 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:47:01.742 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:47:01.748 if tag == 'replace':
2025-07-01 05:47:01.754 atags += '^' * la
2025-07-01 05:47:01.762 btags += '^' * lb
2025-07-01 05:47:01.768 elif tag == 'delete':
2025-07-01 05:47:01.774 atags += '-' * la
2025-07-01 05:47:01.780 elif tag == 'insert':
2025-07-01 05:47:01.786 btags += '+' * lb
2025-07-01 05:47:01.797 elif tag == 'equal':
2025-07-01 05:47:01.809 atags += ' ' * la
2025-07-01 05:47:01.824 btags += ' ' * lb
2025-07-01 05:47:01.833 else:
2025-07-01 05:47:01.840 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:47:01.846 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:47:01.852 else:
2025-07-01 05:47:01.858 # the synch pair is identical
2025-07-01 05:47:01.863 yield ' ' + aelt
2025-07-01 05:47:01.871
2025-07-01 05:47:01.882 # pump out diffs from after the synch point
2025-07-01 05:47:01.893 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:47:01.903
2025-07-01 05:47:01.909 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:47:01.915 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:47:01.921
2025-07-01 05:47:01.927 self = <difflib.Differ object at [hex]>
2025-07-01 05:47:01.933 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:47:01.938 alo = 203, ahi = 1101
2025-07-01 05:47:01.945 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:47:01.950 blo = 203, bhi = 1101
2025-07-01 05:47:01.956
2025-07-01 05:47:01.962 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:47:01.968 g = []
2025-07-01 05:47:01.974 if alo < ahi:
2025-07-01 05:47:01.980 if blo < bhi:
2025-07-01 05:47:01.987 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:47:01.994 else:
2025-07-01 05:47:02.000 g = self._dump('-', a, alo, ahi)
2025-07-01 05:47:02.005 elif blo < bhi:
2025-07-01 05:47:02.012 g = self._dump('+', b, blo, bhi)
2025-07-01 05:47:02.018
2025-07-01 05:47:02.023 > yield from g
2025-07-01 05:47:02.028
2025-07-01 05:47:02.034 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:47:02.039 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:47:02.045
2025-07-01 05:47:02.051 self = <difflib.Differ object at [hex]>
2025-07-01 05:47:02.056 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:47:02.062 alo = 203, ahi = 1101
2025-07-01 05:47:02.072 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:47:02.080 blo = 203, bhi = 1101
2025-07-01 05:47:02.086
2025-07-01 05:47:02.091 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:47:02.095 r"""
2025-07-01 05:47:02.101 When replacing one block of lines with another, search the blocks
2025-07-01 05:47:02.106 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:47:02.111 synch point, and intraline difference marking is done on the
2025-07-01 05:47:02.116 similar pair. Lots of work, but often worth it.
2025-07-01 05:47:02.120
2025-07-01 05:47:02.125 Example:
2025-07-01 05:47:02.129
2025-07-01 05:47:02.133 >>> d = Differ()
2025-07-01 05:47:02.138 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:47:02.143 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:47:02.150 >>> print(''.join(results), end="")
2025-07-01 05:47:02.154 - abcDefghiJkl
2025-07-01 05:47:02.163 + abcdefGhijkl
2025-07-01 05:47:02.176 """
2025-07-01 05:47:02.181
2025-07-01 05:47:02.186 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:47:02.192 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:47:02.197 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:47:02.202 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:47:02.207 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:47:02.212
2025-07-01 05:47:02.217 # search for the pair that matches best without being identical
2025-07-01 05:47:02.222 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:47:02.228 # on junk -- unless we have to)
2025-07-01 05:47:02.234 for j in range(blo, bhi):
2025-07-01 05:47:02.241 bj = b[j]
2025-07-01 05:47:02.248 cruncher.set_seq2(bj)
2025-07-01 05:47:02.255 for i in range(alo, ahi):
2025-07-01 05:47:02.262 ai = a[i]
2025-07-01 05:47:02.269 if ai == bj:
2025-07-01 05:47:02.276 if eqi is None:
2025-07-01 05:47:02.283 eqi, eqj = i, j
2025-07-01 05:47:02.291 continue
2025-07-01 05:47:02.298 cruncher.set_seq1(ai)
2025-07-01 05:47:02.304 # computing similarity is expensive, so use the quick
2025-07-01 05:47:02.311 # upper bounds first -- have seen this speed up messy
2025-07-01 05:47:02.319 # compares by a factor of 3.
2025-07-01 05:47:02.330 # note that ratio() is only expensive to compute the first
2025-07-01 05:47:02.338 # time it's called on a sequence pair; the expensive part
2025-07-01 05:47:02.345 # of the computation is cached by cruncher
2025-07-01 05:47:02.350 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:47:02.355 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:47:02.360 cruncher.ratio() > best_ratio:
2025-07-01 05:47:02.365 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:47:02.370 if best_ratio < cutoff:
2025-07-01 05:47:02.375 # no non-identical "pretty close" pair
2025-07-01 05:47:02.380 if eqi is None:
2025-07-01 05:47:02.386 # no identical pair either -- treat it as a straight replace
2025-07-01 05:47:02.391 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:47:02.401 return
2025-07-01 05:47:02.408 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:47:02.414 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:47:02.420 else:
2025-07-01 05:47:02.425 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:47:02.430 eqi = None
2025-07-01 05:47:02.435
2025-07-01 05:47:02.440 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:47:02.445 # identical
2025-07-01 05:47:02.449
2025-07-01 05:47:02.454 # pump out diffs from before the synch point
2025-07-01 05:47:02.459 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:47:02.464
2025-07-01 05:47:02.468 # do intraline marking on the synch pair
2025-07-01 05:47:02.473 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:47:02.478 if eqi is None:
2025-07-01 05:47:02.483 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:47:02.487 atags = btags = ""
2025-07-01 05:47:02.492 cruncher.set_seqs(aelt, belt)
2025-07-01 05:47:02.498 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:47:02.509 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:47:02.519 if tag == 'replace':
2025-07-01 05:47:02.527 atags += '^' * la
2025-07-01 05:47:02.536 btags += '^' * lb
2025-07-01 05:47:02.544 elif tag == 'delete':
2025-07-01 05:47:02.551 atags += '-' * la
2025-07-01 05:47:02.557 elif tag == 'insert':
2025-07-01 05:47:02.563 btags += '+' * lb
2025-07-01 05:47:02.568 elif tag == 'equal':
2025-07-01 05:47:02.574 atags += ' ' * la
2025-07-01 05:47:02.581 btags += ' ' * lb
2025-07-01 05:47:02.592 else:
2025-07-01 05:47:02.598 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:47:02.603 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:47:02.608 else:
2025-07-01 05:47:02.613 # the synch pair is identical
2025-07-01 05:47:02.617 yield ' ' + aelt
2025-07-01 05:47:02.623
2025-07-01 05:47:02.627 # pump out diffs from after the synch point
2025-07-01 05:47:02.632 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:47:02.637
2025-07-01 05:47:02.642 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:47:02.647 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:47:02.654
2025-07-01 05:47:02.659 self = <difflib.Differ object at [hex]>
2025-07-01 05:47:02.665 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:47:02.672 alo = 204, ahi = 1101
2025-07-01 05:47:02.681 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:47:02.688 blo = 204, bhi = 1101
2025-07-01 05:47:02.694
2025-07-01 05:47:02.700 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:47:02.707 g = []
2025-07-01 05:47:02.713 if alo < ahi:
2025-07-01 05:47:02.719 if blo < bhi:
2025-07-01 05:47:02.725 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:47:02.731 else:
2025-07-01 05:47:02.738 g = self._dump('-', a, alo, ahi)
2025-07-01 05:47:02.745 elif blo < bhi:
2025-07-01 05:47:02.752 g = self._dump('+', b, blo, bhi)
2025-07-01 05:47:02.759
2025-07-01 05:47:02.766 > yield from g
2025-07-01 05:47:02.773
2025-07-01 05:47:02.779 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:47:02.787 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:47:02.794
2025-07-01 05:47:02.801 self = <difflib.Differ object at [hex]>
2025-07-01 05:47:02.807 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:47:02.818 alo = 204, ahi = 1101
2025-07-01 05:47:02.827 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:47:02.834 blo = 204, bhi = 1101
2025-07-01 05:47:02.846
2025-07-01 05:47:02.857 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:47:02.864 r"""
2025-07-01 05:47:02.872 When replacing one block of lines with another, search the blocks
2025-07-01 05:47:02.879 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:47:02.885 synch point, and intraline difference marking is done on the
2025-07-01 05:47:02.891 similar pair. Lots of work, but often worth it.
2025-07-01 05:47:02.897
2025-07-01 05:47:02.903 Example:
2025-07-01 05:47:02.909
2025-07-01 05:47:02.914 >>> d = Differ()
2025-07-01 05:47:02.920 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:47:02.927 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:47:02.933 >>> print(''.join(results), end="")
2025-07-01 05:47:02.940 - abcDefghiJkl
2025-07-01 05:47:02.953 + abcdefGhijkl
2025-07-01 05:47:02.967 """
2025-07-01 05:47:02.973
2025-07-01 05:47:02.980 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:47:02.987 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:47:02.994 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:47:03.000 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:47:03.007 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:47:03.015
2025-07-01 05:47:03.025 # search for the pair that matches best without being identical
2025-07-01 05:47:03.034 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:47:03.041 # on junk -- unless we have to)
2025-07-01 05:47:03.048 for j in range(blo, bhi):
2025-07-01 05:47:03.056 bj = b[j]
2025-07-01 05:47:03.063 cruncher.set_seq2(bj)
2025-07-01 05:47:03.069 for i in range(alo, ahi):
2025-07-01 05:47:03.075 ai = a[i]
2025-07-01 05:47:03.082 if ai == bj:
2025-07-01 05:47:03.091 if eqi is None:
2025-07-01 05:47:03.102 eqi, eqj = i, j
2025-07-01 05:47:03.109 continue
2025-07-01 05:47:03.116 cruncher.set_seq1(ai)
2025-07-01 05:47:03.121 # computing similarity is expensive, so use the quick
2025-07-01 05:47:03.126 # upper bounds first -- have seen this speed up messy
2025-07-01 05:47:03.131 # compares by a factor of 3.
2025-07-01 05:47:03.137 # note that ratio() is only expensive to compute the first
2025-07-01 05:47:03.143 # time it's called on a sequence pair; the expensive part
2025-07-01 05:47:03.148 # of the computation is cached by cruncher
2025-07-01 05:47:03.152 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:47:03.158 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:47:03.163 cruncher.ratio() > best_ratio:
2025-07-01 05:47:03.168 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:47:03.173 if best_ratio < cutoff:
2025-07-01 05:47:03.179 # no non-identical "pretty close" pair
2025-07-01 05:47:03.184 if eqi is None:
2025-07-01 05:47:03.189 # no identical pair either -- treat it as a straight replace
2025-07-01 05:47:03.196 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:47:03.203 return
2025-07-01 05:47:03.210 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:47:03.216 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:47:03.222 else:
2025-07-01 05:47:03.228 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:47:03.238 eqi = None
2025-07-01 05:47:03.251
2025-07-01 05:47:03.265 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:47:03.274 # identical
2025-07-01 05:47:03.285
2025-07-01 05:47:03.295 # pump out diffs from before the synch point
2025-07-01 05:47:03.304 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:47:03.312
2025-07-01 05:47:03.319 # do intraline marking on the synch pair
2025-07-01 05:47:03.327 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:47:03.338 if eqi is None:
2025-07-01 05:47:03.347 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:47:03.354 atags = btags = ""
2025-07-01 05:47:03.366 cruncher.set_seqs(aelt, belt)
2025-07-01 05:47:03.376 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:47:03.385 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:47:03.393 if tag == 'replace':
2025-07-01 05:47:03.400 atags += '^' * la
2025-07-01 05:47:03.406 btags += '^' * lb
2025-07-01 05:47:03.411 elif tag == 'delete':
2025-07-01 05:47:03.419 atags += '-' * la
2025-07-01 05:47:03.429 elif tag == 'insert':
2025-07-01 05:47:03.438 btags += '+' * lb
2025-07-01 05:47:03.445 elif tag == 'equal':
2025-07-01 05:47:03.452 atags += ' ' * la
2025-07-01 05:47:03.459 btags += ' ' * lb
2025-07-01 05:47:03.467 else:
2025-07-01 05:47:03.476 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:47:03.483 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:47:03.491 else:
2025-07-01 05:47:03.504 # the synch pair is identical
2025-07-01 05:47:03.516 yield ' ' + aelt
2025-07-01 05:47:03.526
2025-07-01 05:47:03.536 # pump out diffs from after the synch point
2025-07-01 05:47:03.543 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:47:03.549
2025-07-01 05:47:03.554 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:47:03.559 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:47:03.563
2025-07-01 05:47:03.568 self = <difflib.Differ object at [hex]>
2025-07-01 05:47:03.575 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:47:03.584 alo = 205, ahi = 1101
2025-07-01 05:47:03.593 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:47:03.601 blo = 205, bhi = 1101
2025-07-01 05:47:03.611
2025-07-01 05:47:03.621 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:47:03.629 g = []
2025-07-01 05:47:03.637 if alo < ahi:
2025-07-01 05:47:03.649 if blo < bhi:
2025-07-01 05:47:03.660 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:47:03.672 else:
2025-07-01 05:47:03.687 g = self._dump('-', a, alo, ahi)
2025-07-01 05:47:03.697 elif blo < bhi:
2025-07-01 05:47:03.705 g = self._dump('+', b, blo, bhi)
2025-07-01 05:47:03.712
2025-07-01 05:47:03.719 > yield from g
2025-07-01 05:47:03.729
2025-07-01 05:47:03.739 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:47:03.748 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:47:03.755
2025-07-01 05:47:03.762 self = <difflib.Differ object at [hex]>
2025-07-01 05:47:03.771 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:47:03.781 alo = 205, ahi = 1101
2025-07-01 05:47:03.790 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:47:03.797 blo = 205, bhi = 1101
2025-07-01 05:47:03.803
2025-07-01 05:47:03.808 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:47:03.814 r"""
2025-07-01 05:47:03.820 When replacing one block of lines with another, search the blocks
2025-07-01 05:47:03.826 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:47:03.834 synch point, and intraline difference marking is done on the
2025-07-01 05:47:03.846 similar pair. Lots of work, but often worth it.
2025-07-01 05:47:03.856
2025-07-01 05:47:03.863 Example:
2025-07-01 05:47:03.869
2025-07-01 05:47:03.875 >>> d = Differ()
2025-07-01 05:47:03.882 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:47:03.891 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:47:03.901 >>> print(''.join(results), end="")
2025-07-01 05:47:03.909 - abcDefghiJkl
2025-07-01 05:47:03.919 + abcdefGhijkl
2025-07-01 05:47:03.929 """
2025-07-01 05:47:03.935
2025-07-01 05:47:03.941 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:47:03.947 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:47:03.952 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:47:03.958 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:47:03.964 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:47:03.970
2025-07-01 05:47:03.976 # search for the pair that matches best without being identical
2025-07-01 05:47:03.982 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:47:03.988 # on junk -- unless we have to)
2025-07-01 05:47:03.997 for j in range(blo, bhi):
2025-07-01 05:47:04.003 bj = b[j]
2025-07-01 05:47:04.009 cruncher.set_seq2(bj)
2025-07-01 05:47:04.016 for i in range(alo, ahi):
2025-07-01 05:47:04.022 ai = a[i]
2025-07-01 05:47:04.032 if ai == bj:
2025-07-01 05:47:04.042 if eqi is None:
2025-07-01 05:47:04.051 eqi, eqj = i, j
2025-07-01 05:47:04.056 continue
2025-07-01 05:47:04.062 cruncher.set_seq1(ai)
2025-07-01 05:47:04.068 # computing similarity is expensive, so use the quick
2025-07-01 05:47:04.075 # upper bounds first -- have seen this speed up messy
2025-07-01 05:47:04.083 # compares by a factor of 3.
2025-07-01 05:47:04.095 # note that ratio() is only expensive to compute the first
2025-07-01 05:47:04.104 # time it's called on a sequence pair; the expensive part
2025-07-01 05:47:04.111 # of the computation is cached by cruncher
2025-07-01 05:47:04.116 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:47:04.121 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:47:04.132 cruncher.ratio() > best_ratio:
2025-07-01 05:47:04.139 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:47:04.146 if best_ratio < cutoff:
2025-07-01 05:47:04.151 # no non-identical "pretty close" pair
2025-07-01 05:47:04.157 if eqi is None:
2025-07-01 05:47:04.163 # no identical pair either -- treat it as a straight replace
2025-07-01 05:47:04.169 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:47:04.178 return
2025-07-01 05:47:04.185 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:47:04.192 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:47:04.198 else:
2025-07-01 05:47:04.205 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:47:04.212 eqi = None
2025-07-01 05:47:04.219
2025-07-01 05:47:04.225 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:47:04.231 # identical
2025-07-01 05:47:04.239
2025-07-01 05:47:04.247 # pump out diffs from before the synch point
2025-07-01 05:47:04.255 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:47:04.263
2025-07-01 05:47:04.270 # do intraline marking on the synch pair
2025-07-01 05:47:04.277 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:47:04.284 if eqi is None:
2025-07-01 05:47:04.292 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:47:04.299 atags = btags = ""
2025-07-01 05:47:04.306 cruncher.set_seqs(aelt, belt)
2025-07-01 05:47:04.314 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:47:04.326 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:47:04.333 if tag == 'replace':
2025-07-01 05:47:04.339 atags += '^' * la
2025-07-01 05:47:04.346 btags += '^' * lb
2025-07-01 05:47:04.353 elif tag == 'delete':
2025-07-01 05:47:04.359 atags += '-' * la
2025-07-01 05:47:04.366 elif tag == 'insert':
2025-07-01 05:47:04.373 btags += '+' * lb
2025-07-01 05:47:04.380 elif tag == 'equal':
2025-07-01 05:47:04.386 atags += ' ' * la
2025-07-01 05:47:04.398 btags += ' ' * lb
2025-07-01 05:47:04.407 else:
2025-07-01 05:47:04.414 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:47:04.420 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:47:04.425 else:
2025-07-01 05:47:04.431 # the synch pair is identical
2025-07-01 05:47:04.438 yield ' ' + aelt
2025-07-01 05:47:04.448
2025-07-01 05:47:04.456 # pump out diffs from after the synch point
2025-07-01 05:47:04.462 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:47:04.468
2025-07-01 05:47:04.474 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:47:04.480 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:47:04.486
2025-07-01 05:47:04.492 self = <difflib.Differ object at [hex]>
2025-07-01 05:47:04.499 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:47:04.504 alo = 206, ahi = 1101
2025-07-01 05:47:04.511 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:47:04.516 blo = 206, bhi = 1101
2025-07-01 05:47:04.522
2025-07-01 05:47:04.527 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:47:04.531 g = []
2025-07-01 05:47:04.536 if alo < ahi:
2025-07-01 05:47:04.541 if blo < bhi:
2025-07-01 05:47:04.546 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:47:04.551 else:
2025-07-01 05:47:04.557 g = self._dump('-', a, alo, ahi)
2025-07-01 05:47:04.564 elif blo < bhi:
2025-07-01 05:47:04.570 g = self._dump('+', b, blo, bhi)
2025-07-01 05:47:04.581
2025-07-01 05:47:04.592 > yield from g
2025-07-01 05:47:04.599
2025-07-01 05:47:04.605 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:47:04.611 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:47:04.615
2025-07-01 05:47:04.620 self = <difflib.Differ object at [hex]>
2025-07-01 05:47:04.625 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:47:04.630 alo = 206, ahi = 1101
2025-07-01 05:47:04.635 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:47:04.640 blo = 206, bhi = 1101
2025-07-01 05:47:04.644
2025-07-01 05:47:04.650 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:47:04.656 r"""
2025-07-01 05:47:04.662 When replacing one block of lines with another, search the blocks
2025-07-01 05:47:04.669 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:47:04.675 synch point, and intraline difference marking is done on the
2025-07-01 05:47:04.682 similar pair. Lots of work, but often worth it.
2025-07-01 05:47:04.689
2025-07-01 05:47:04.695 Example:
2025-07-01 05:47:04.703
2025-07-01 05:47:04.714 >>> d = Differ()
2025-07-01 05:47:04.723 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:47:04.731 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:47:04.737 >>> print(''.join(results), end="")
2025-07-01 05:47:04.742 - abcDefghiJkl
2025-07-01 05:47:04.751 + abcdefGhijkl
2025-07-01 05:47:04.762 """
2025-07-01 05:47:04.768
2025-07-01 05:47:04.775 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:47:04.782 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:47:04.788 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:47:04.795 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:47:04.803 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:47:04.813
2025-07-01 05:47:04.821 # search for the pair that matches best without being identical
2025-07-01 05:47:04.827 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:47:04.833 # on junk -- unless we have to)
2025-07-01 05:47:04.839 for j in range(blo, bhi):
2025-07-01 05:47:04.844 bj = b[j]
2025-07-01 05:47:04.850 cruncher.set_seq2(bj)
2025-07-01 05:47:04.857 for i in range(alo, ahi):
2025-07-01 05:47:04.864 ai = a[i]
2025-07-01 05:47:04.870 if ai == bj:
2025-07-01 05:47:04.875 if eqi is None:
2025-07-01 05:47:04.881 eqi, eqj = i, j
2025-07-01 05:47:04.886 continue
2025-07-01 05:47:04.893 cruncher.set_seq1(ai)
2025-07-01 05:47:04.903 # computing similarity is expensive, so use the quick
2025-07-01 05:47:04.909 # upper bounds first -- have seen this speed up messy
2025-07-01 05:47:04.915 # compares by a factor of 3.
2025-07-01 05:47:04.921 # note that ratio() is only expensive to compute the first
2025-07-01 05:47:04.927 # time it's called on a sequence pair; the expensive part
2025-07-01 05:47:04.934 # of the computation is cached by cruncher
2025-07-01 05:47:04.944 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:47:04.954 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:47:04.961 cruncher.ratio() > best_ratio:
2025-07-01 05:47:04.967 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:47:04.973 if best_ratio < cutoff:
2025-07-01 05:47:04.977 # no non-identical "pretty close" pair
2025-07-01 05:47:04.982 if eqi is None:
2025-07-01 05:47:04.987 # no identical pair either -- treat it as a straight replace
2025-07-01 05:47:04.992 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:47:04.998 return
2025-07-01 05:47:05.003 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:47:05.010 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:47:05.018 else:
2025-07-01 05:47:05.025 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:47:05.031 eqi = None
2025-07-01 05:47:05.037
2025-07-01 05:47:05.045 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:47:05.052 # identical
2025-07-01 05:47:05.059
2025-07-01 05:47:05.065 # pump out diffs from before the synch point
2025-07-01 05:47:05.072 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:47:05.078
2025-07-01 05:47:05.089 # do intraline marking on the synch pair
2025-07-01 05:47:05.098 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:47:05.104 if eqi is None:
2025-07-01 05:47:05.113 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:47:05.125 atags = btags = ""
2025-07-01 05:47:05.137 cruncher.set_seqs(aelt, belt)
2025-07-01 05:47:05.149 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:47:05.160 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:47:05.171 if tag == 'replace':
2025-07-01 05:47:05.181 atags += '^' * la
2025-07-01 05:47:05.193 btags += '^' * lb
2025-07-01 05:47:05.205 elif tag == 'delete':
2025-07-01 05:47:05.216 atags += '-' * la
2025-07-01 05:47:05.224 elif tag == 'insert':
2025-07-01 05:47:05.231 btags += '+' * lb
2025-07-01 05:47:05.238 elif tag == 'equal':
2025-07-01 05:47:05.244 atags += ' ' * la
2025-07-01 05:47:05.250 btags += ' ' * lb
2025-07-01 05:47:05.255 else:
2025-07-01 05:47:05.263 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:47:05.275 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:47:05.284 else:
2025-07-01 05:47:05.292 # the synch pair is identical
2025-07-01 05:47:05.299 yield ' ' + aelt
2025-07-01 05:47:05.305
2025-07-01 05:47:05.310 # pump out diffs from after the synch point
2025-07-01 05:47:05.316 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:47:05.322
2025-07-01 05:47:05.332 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:47:05.341 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:47:05.350
2025-07-01 05:47:05.356 self = <difflib.Differ object at [hex]>
2025-07-01 05:47:05.363 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:47:05.374 alo = 207, ahi = 1101
2025-07-01 05:47:05.383 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:47:05.389 blo = 207, bhi = 1101
2025-07-01 05:47:05.396
2025-07-01 05:47:05.402 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:47:05.410 g = []
2025-07-01 05:47:05.421 if alo < ahi:
2025-07-01 05:47:05.430 if blo < bhi:
2025-07-01 05:47:05.437 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:47:05.444 else:
2025-07-01 05:47:05.450 g = self._dump('-', a, alo, ahi)
2025-07-01 05:47:05.460 elif blo < bhi:
2025-07-01 05:47:05.469 g = self._dump('+', b, blo, bhi)
2025-07-01 05:47:05.476
2025-07-01 05:47:05.483 > yield from g
2025-07-01 05:47:05.492
2025-07-01 05:47:05.501 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:47:05.509 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:47:05.514
2025-07-01 05:47:05.520 self = <difflib.Differ object at [hex]>
2025-07-01 05:47:05.525 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:47:05.531 alo = 207, ahi = 1101
2025-07-01 05:47:05.539 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:47:05.546 blo = 207, bhi = 1101
2025-07-01 05:47:05.553
2025-07-01 05:47:05.564 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:47:05.576 r"""
2025-07-01 05:47:05.585 When replacing one block of lines with another, search the blocks
2025-07-01 05:47:05.597 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:47:05.606 synch point, and intraline difference marking is done on the
2025-07-01 05:47:05.612 similar pair. Lots of work, but often worth it.
2025-07-01 05:47:05.618
2025-07-01 05:47:05.625 Example:
2025-07-01 05:47:05.631
2025-07-01 05:47:05.639 >>> d = Differ()
2025-07-01 05:47:05.650 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:47:05.659 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:47:05.666 >>> print(''.join(results), end="")
2025-07-01 05:47:05.672 - abcDefghiJkl
2025-07-01 05:47:05.682 + abcdefGhijkl
2025-07-01 05:47:05.708 """
2025-07-01 05:47:05.719
2025-07-01 05:47:05.729 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:47:05.736 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:47:05.742 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:47:05.748 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:47:05.754 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:47:05.759
2025-07-01 05:47:05.765 # search for the pair that matches best without being identical
2025-07-01 05:47:05.771 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:47:05.777 # on junk -- unless we have to)
2025-07-01 05:47:05.783 for j in range(blo, bhi):
2025-07-01 05:47:05.789 bj = b[j]
2025-07-01 05:47:05.796 cruncher.set_seq2(bj)
2025-07-01 05:47:05.802 for i in range(alo, ahi):
2025-07-01 05:47:05.814 ai = a[i]
2025-07-01 05:47:05.826 if ai == bj:
2025-07-01 05:47:05.837 if eqi is None:
2025-07-01 05:47:05.846 eqi, eqj = i, j
2025-07-01 05:47:05.857 continue
2025-07-01 05:47:05.865 cruncher.set_seq1(ai)
2025-07-01 05:47:05.876 # computing similarity is expensive, so use the quick
2025-07-01 05:47:05.886 # upper bounds first -- have seen this speed up messy
2025-07-01 05:47:05.897 # compares by a factor of 3.
2025-07-01 05:47:05.906 # note that ratio() is only expensive to compute the first
2025-07-01 05:47:05.912 # time it's called on a sequence pair; the expensive part
2025-07-01 05:47:05.918 # of the computation is cached by cruncher
2025-07-01 05:47:05.925 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:47:05.937 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:47:05.947 cruncher.ratio() > best_ratio:
2025-07-01 05:47:05.955 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:47:05.962 if best_ratio < cutoff:
2025-07-01 05:47:05.972 # no non-identical "pretty close" pair
2025-07-01 05:47:05.982 if eqi is None:
2025-07-01 05:47:05.993 # no identical pair either -- treat it as a straight replace
2025-07-01 05:47:06.005 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:47:06.015 return
2025-07-01 05:47:06.023 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:47:06.030 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:47:06.037 else:
2025-07-01 05:47:06.044 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:47:06.050 eqi = None
2025-07-01 05:47:06.055
2025-07-01 05:47:06.063 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:47:06.071 # identical
2025-07-01 05:47:06.077
2025-07-01 05:47:06.088 # pump out diffs from before the synch point
2025-07-01 05:47:06.099 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:47:06.110
2025-07-01 05:47:06.121 # do intraline marking on the synch pair
2025-07-01 05:47:06.132 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:47:06.141 if eqi is None:
2025-07-01 05:47:06.149 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:47:06.156 atags = btags = ""
2025-07-01 05:47:06.163 cruncher.set_seqs(aelt, belt)
2025-07-01 05:47:06.170 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:47:06.180 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:47:06.189 if tag == 'replace':
2025-07-01 05:47:06.195 atags += '^' * la
2025-07-01 05:47:06.202 btags += '^' * lb
2025-07-01 05:47:06.208 elif tag == 'delete':
2025-07-01 05:47:06.213 atags += '-' * la
2025-07-01 05:47:06.219 elif tag == 'insert':
2025-07-01 05:47:06.225 btags += '+' * lb
2025-07-01 05:47:06.230 elif tag == 'equal':
2025-07-01 05:47:06.240 atags += ' ' * la
2025-07-01 05:47:06.251 btags += ' ' * lb
2025-07-01 05:47:06.259 else:
2025-07-01 05:47:06.267 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:47:06.276 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:47:06.287 else:
2025-07-01 05:47:06.296 # the synch pair is identical
2025-07-01 05:47:06.309 yield ' ' + aelt
2025-07-01 05:47:06.319
2025-07-01 05:47:06.330 # pump out diffs from after the synch point
2025-07-01 05:47:06.342 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:47:06.354
2025-07-01 05:47:06.366 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:47:06.373 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:47:06.379
2025-07-01 05:47:06.386 self = <difflib.Differ object at [hex]>
2025-07-01 05:47:06.399 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:47:06.407 alo = 208, ahi = 1101
2025-07-01 05:47:06.414 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:47:06.420 blo = 208, bhi = 1101
2025-07-01 05:47:06.425
2025-07-01 05:47:06.430 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:47:06.435 g = []
2025-07-01 05:47:06.441 if alo < ahi:
2025-07-01 05:47:06.446 if blo < bhi:
2025-07-01 05:47:06.459 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:47:06.469 else:
2025-07-01 05:47:06.480 g = self._dump('-', a, alo, ahi)
2025-07-01 05:47:06.490 elif blo < bhi:
2025-07-01 05:47:06.501 g = self._dump('+', b, blo, bhi)
2025-07-01 05:47:06.512
2025-07-01 05:47:06.521 > yield from g
2025-07-01 05:47:06.529
2025-07-01 05:47:06.535 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:47:06.543 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:47:06.550
2025-07-01 05:47:06.561 self = <difflib.Differ object at [hex]>
2025-07-01 05:47:06.570 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:47:06.577 alo = 208, ahi = 1101
2025-07-01 05:47:06.589 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:47:06.602 blo = 208, bhi = 1101
2025-07-01 05:47:06.613
2025-07-01 05:47:06.625 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:47:06.637 r"""
2025-07-01 05:47:06.649 When replacing one block of lines with another, search the blocks
2025-07-01 05:47:06.661 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:47:06.671 synch point, and intraline difference marking is done on the
2025-07-01 05:47:06.679 similar pair. Lots of work, but often worth it.
2025-07-01 05:47:06.687
2025-07-01 05:47:06.693 Example:
2025-07-01 05:47:06.701
2025-07-01 05:47:06.711 >>> d = Differ()
2025-07-01 05:47:06.719 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:47:06.726 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:47:06.732 >>> print(''.join(results), end="")
2025-07-01 05:47:06.738 - abcDefghiJkl
2025-07-01 05:47:06.749 + abcdefGhijkl
2025-07-01 05:47:06.760 """
2025-07-01 05:47:06.766
2025-07-01 05:47:06.773 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:47:06.779 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:47:06.784 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:47:06.790 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:47:06.796 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:47:06.801
2025-07-01 05:47:06.814 # search for the pair that matches best without being identical
2025-07-01 05:47:06.822 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:47:06.830 # on junk -- unless we have to)
2025-07-01 05:47:06.836 for j in range(blo, bhi):
2025-07-01 05:47:06.845 bj = b[j]
2025-07-01 05:47:06.857 cruncher.set_seq2(bj)
2025-07-01 05:47:06.865 for i in range(alo, ahi):
2025-07-01 05:47:06.876 ai = a[i]
2025-07-01 05:47:06.885 if ai == bj:
2025-07-01 05:47:06.893 if eqi is None:
2025-07-01 05:47:06.900 eqi, eqj = i, j
2025-07-01 05:47:06.907 continue
2025-07-01 05:47:06.912 cruncher.set_seq1(ai)
2025-07-01 05:47:06.918 # computing similarity is expensive, so use the quick
2025-07-01 05:47:06.924 # upper bounds first -- have seen this speed up messy
2025-07-01 05:47:06.930 # compares by a factor of 3.
2025-07-01 05:47:06.939 # note that ratio() is only expensive to compute the first
2025-07-01 05:47:06.949 # time it's called on a sequence pair; the expensive part
2025-07-01 05:47:06.956 # of the computation is cached by cruncher
2025-07-01 05:47:06.962 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:47:06.972 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:47:06.982 cruncher.ratio() > best_ratio:
2025-07-01 05:47:06.990 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:47:07.000 if best_ratio < cutoff:
2025-07-01 05:47:07.008 # no non-identical "pretty close" pair
2025-07-01 05:47:07.015 if eqi is None:
2025-07-01 05:47:07.024 # no identical pair either -- treat it as a straight replace
2025-07-01 05:47:07.035 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:47:07.044 return
2025-07-01 05:47:07.051 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:47:07.056 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:47:07.062 else:
2025-07-01 05:47:07.068 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:47:07.075 eqi = None
2025-07-01 05:47:07.080
2025-07-01 05:47:07.087 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:47:07.092 # identical
2025-07-01 05:47:07.098
2025-07-01 05:47:07.109 # pump out diffs from before the synch point
2025-07-01 05:47:07.118 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:47:07.129
2025-07-01 05:47:07.139 # do intraline marking on the synch pair
2025-07-01 05:47:07.151 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:47:07.161 if eqi is None:
2025-07-01 05:47:07.171 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:47:07.182 atags = btags = ""
2025-07-01 05:47:07.195 cruncher.set_seqs(aelt, belt)
2025-07-01 05:47:07.208 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:47:07.218 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:47:07.228 if tag == 'replace':
2025-07-01 05:47:07.235 atags += '^' * la
2025-07-01 05:47:07.241 btags += '^' * lb
2025-07-01 05:47:07.247 elif tag == 'delete':
2025-07-01 05:47:07.253 atags += '-' * la
2025-07-01 05:47:07.260 elif tag == 'insert':
2025-07-01 05:47:07.265 btags += '+' * lb
2025-07-01 05:47:07.274 elif tag == 'equal':
2025-07-01 05:47:07.284 atags += ' ' * la
2025-07-01 05:47:07.295 btags += ' ' * lb
2025-07-01 05:47:07.306 else:
2025-07-01 05:47:07.315 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:47:07.324 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:47:07.330 else:
2025-07-01 05:47:07.337 # the synch pair is identical
2025-07-01 05:47:07.342 yield ' ' + aelt
2025-07-01 05:47:07.352
2025-07-01 05:47:07.362 # pump out diffs from after the synch point
2025-07-01 05:47:07.369 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:47:07.375
2025-07-01 05:47:07.383 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:47:07.394 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:47:07.402
2025-07-01 05:47:07.409 self = <difflib.Differ object at [hex]>
2025-07-01 05:47:07.420 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:47:07.429 alo = 209, ahi = 1101
2025-07-01 05:47:07.438 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:47:07.445 blo = 209, bhi = 1101
2025-07-01 05:47:07.452
2025-07-01 05:47:07.466 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:47:07.476 g = []
2025-07-01 05:47:07.486 if alo < ahi:
2025-07-01 05:47:07.498 if blo < bhi:
2025-07-01 05:47:07.510 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:47:07.518 else:
2025-07-01 05:47:07.528 g = self._dump('-', a, alo, ahi)
2025-07-01 05:47:07.543 elif blo < bhi:
2025-07-01 05:47:07.556 g = self._dump('+', b, blo, bhi)
2025-07-01 05:47:07.567
2025-07-01 05:47:07.578 > yield from g
2025-07-01 05:47:07.588
2025-07-01 05:47:07.600 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:47:07.614 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:47:07.622
2025-07-01 05:47:07.629 self = <difflib.Differ object at [hex]>
2025-07-01 05:47:07.636 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:47:07.643 alo = 209, ahi = 1101
2025-07-01 05:47:07.651 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:47:07.659 blo = 209, bhi = 1101
2025-07-01 05:47:07.669
2025-07-01 05:47:07.678 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:47:07.684 r"""
2025-07-01 05:47:07.689 When replacing one block of lines with another, search the blocks
2025-07-01 05:47:07.695 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:47:07.701 synch point, and intraline difference marking is done on the
2025-07-01 05:47:07.707 similar pair. Lots of work, but often worth it.
2025-07-01 05:47:07.712
2025-07-01 05:47:07.719 Example:
2025-07-01 05:47:07.729
2025-07-01 05:47:07.739 >>> d = Differ()
2025-07-01 05:47:07.746 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:47:07.751 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:47:07.757 >>> print(''.join(results), end="")
2025-07-01 05:47:07.765 - abcDefghiJkl
2025-07-01 05:47:07.775 + abcdefGhijkl
2025-07-01 05:47:07.785 """
2025-07-01 05:47:07.790
2025-07-01 05:47:07.795 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:47:07.805 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:47:07.812 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:47:07.818 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:47:07.825 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:47:07.835
2025-07-01 05:47:07.842 # search for the pair that matches best without being identical
2025-07-01 05:47:07.848 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:47:07.854 # on junk -- unless we have to)
2025-07-01 05:47:07.859 for j in range(blo, bhi):
2025-07-01 05:47:07.864 bj = b[j]
2025-07-01 05:47:07.873 cruncher.set_seq2(bj)
2025-07-01 05:47:07.879 for i in range(alo, ahi):
2025-07-01 05:47:07.887 ai = a[i]
2025-07-01 05:47:07.894 if ai == bj:
2025-07-01 05:47:07.899 if eqi is None:
2025-07-01 05:47:07.905 eqi, eqj = i, j
2025-07-01 05:47:07.916 continue
2025-07-01 05:47:07.925 cruncher.set_seq1(ai)
2025-07-01 05:47:07.931 # computing similarity is expensive, so use the quick
2025-07-01 05:47:07.937 # upper bounds first -- have seen this speed up messy
2025-07-01 05:47:07.943 # compares by a factor of 3.
2025-07-01 05:47:07.950 # note that ratio() is only expensive to compute the first
2025-07-01 05:47:07.961 # time it's called on a sequence pair; the expensive part
2025-07-01 05:47:07.971 # of the computation is cached by cruncher
2025-07-01 05:47:07.981 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:47:07.991 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:47:07.999 cruncher.ratio() > best_ratio:
2025-07-01 05:47:08.011 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:47:08.022 if best_ratio < cutoff:
2025-07-01 05:47:08.034 # no non-identical "pretty close" pair
2025-07-01 05:47:08.041 if eqi is None:
2025-07-01 05:47:08.048 # no identical pair either -- treat it as a straight replace
2025-07-01 05:47:08.055 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:47:08.063 return
2025-07-01 05:47:08.074 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:47:08.086 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:47:08.093 else:
2025-07-01 05:47:08.098 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:47:08.104 eqi = None
2025-07-01 05:47:08.115
2025-07-01 05:47:08.128 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:47:08.138 # identical
2025-07-01 05:47:08.150
2025-07-01 05:47:08.160 # pump out diffs from before the synch point
2025-07-01 05:47:08.173 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:47:08.184
2025-07-01 05:47:08.195 # do intraline marking on the synch pair
2025-07-01 05:47:08.205 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:47:08.217 if eqi is None:
2025-07-01 05:47:08.230 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:47:08.239 atags = btags = ""
2025-07-01 05:47:08.247 cruncher.set_seqs(aelt, belt)
2025-07-01 05:47:08.254 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:47:08.261 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:47:08.267 if tag == 'replace':
2025-07-01 05:47:08.275 atags += '^' * la
2025-07-01 05:47:08.281 btags += '^' * lb
2025-07-01 05:47:08.287 elif tag == 'delete':
2025-07-01 05:47:08.300 atags += '-' * la
2025-07-01 05:47:08.308 elif tag == 'insert':
2025-07-01 05:47:08.315 btags += '+' * lb
2025-07-01 05:47:08.326 elif tag == 'equal':
2025-07-01 05:47:08.339 atags += ' ' * la
2025-07-01 05:47:08.349 btags += ' ' * lb
2025-07-01 05:47:08.358 else:
2025-07-01 05:47:08.367 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:47:08.379 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:47:08.389 else:
2025-07-01 05:47:08.399 # the synch pair is identical
2025-07-01 05:47:08.406 yield ' ' + aelt
2025-07-01 05:47:08.415
2025-07-01 05:47:08.422 # pump out diffs from after the synch point
2025-07-01 05:47:08.432 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:47:08.444
2025-07-01 05:47:08.453 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:47:08.461 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:47:08.467
2025-07-01 05:47:08.474 self = <difflib.Differ object at [hex]>
2025-07-01 05:47:08.485 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:47:08.495 alo = 210, ahi = 1101
2025-07-01 05:47:08.504 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:47:08.510 blo = 210, bhi = 1101
2025-07-01 05:47:08.516
2025-07-01 05:47:08.522 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:47:08.528 g = []
2025-07-01 05:47:08.539 if alo < ahi:
2025-07-01 05:47:08.547 if blo < bhi:
2025-07-01 05:47:08.556 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:47:08.563 else:
2025-07-01 05:47:08.570 g = self._dump('-', a, alo, ahi)
2025-07-01 05:47:08.577 elif blo < bhi:
2025-07-01 05:47:08.582 g = self._dump('+', b, blo, bhi)
2025-07-01 05:47:08.587
2025-07-01 05:47:08.591 > yield from g
2025-07-01 05:47:08.596
2025-07-01 05:47:08.601 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:47:08.606 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:47:08.613
2025-07-01 05:47:08.620 self = <difflib.Differ object at [hex]>
2025-07-01 05:47:08.627 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:47:08.634 alo = 210, ahi = 1101
2025-07-01 05:47:08.644 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:47:08.653 blo = 210, bhi = 1101
2025-07-01 05:47:08.660
2025-07-01 05:47:08.667 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:47:08.674 r"""
2025-07-01 05:47:08.684 When replacing one block of lines with another, search the blocks
2025-07-01 05:47:08.694 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:47:08.702 synch point, and intraline difference marking is done on the
2025-07-01 05:47:08.712 similar pair. Lots of work, but often worth it.
2025-07-01 05:47:08.722
2025-07-01 05:47:08.731 Example:
2025-07-01 05:47:08.739
2025-07-01 05:47:08.746 >>> d = Differ()
2025-07-01 05:47:08.757 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:47:08.766 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:47:08.777 >>> print(''.join(results), end="")
2025-07-01 05:47:08.786 - abcDefghiJkl
2025-07-01 05:47:08.807 + abcdefGhijkl
2025-07-01 05:47:08.829 """
2025-07-01 05:47:08.836
2025-07-01 05:47:08.843 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:47:08.856 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:47:08.868 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:47:08.876 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:47:08.883 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:47:08.891
2025-07-01 05:47:08.902 # search for the pair that matches best without being identical
2025-07-01 05:47:08.913 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:47:08.920 # on junk -- unless we have to)
2025-07-01 05:47:08.932 for j in range(blo, bhi):
2025-07-01 05:47:08.941 bj = b[j]
2025-07-01 05:47:08.952 cruncher.set_seq2(bj)
2025-07-01 05:47:08.962 for i in range(alo, ahi):
2025-07-01 05:47:08.973 ai = a[i]
2025-07-01 05:47:08.984 if ai == bj:
2025-07-01 05:47:08.996 if eqi is None:
2025-07-01 05:47:09.005 eqi, eqj = i, j
2025-07-01 05:47:09.015 continue
2025-07-01 05:47:09.026 cruncher.set_seq1(ai)
2025-07-01 05:47:09.037 # computing similarity is expensive, so use the quick
2025-07-01 05:47:09.049 # upper bounds first -- have seen this speed up messy
2025-07-01 05:47:09.060 # compares by a factor of 3.
2025-07-01 05:47:09.071 # note that ratio() is only expensive to compute the first
2025-07-01 05:47:09.080 # time it's called on a sequence pair; the expensive part
2025-07-01 05:47:09.088 # of the computation is cached by cruncher
2025-07-01 05:47:09.095 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:47:09.103 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:47:09.114 cruncher.ratio() > best_ratio:
2025-07-01 05:47:09.122 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:47:09.130 if best_ratio < cutoff:
2025-07-01 05:47:09.140 # no non-identical "pretty close" pair
2025-07-01 05:47:09.150 if eqi is None:
2025-07-01 05:47:09.160 # no identical pair either -- treat it as a straight replace
2025-07-01 05:47:09.168 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:47:09.174 return
2025-07-01 05:47:09.180 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:47:09.186 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:47:09.192 else:
2025-07-01 05:47:09.199 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:47:09.205 eqi = None
2025-07-01 05:47:09.210
2025-07-01 05:47:09.217 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:47:09.223 # identical
2025-07-01 05:47:09.233
2025-07-01 05:47:09.241 # pump out diffs from before the synch point
2025-07-01 05:47:09.248 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:47:09.255
2025-07-01 05:47:09.261 # do intraline marking on the synch pair
2025-07-01 05:47:09.267 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:47:09.275 if eqi is None:
2025-07-01 05:47:09.287 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:47:09.295 atags = btags = ""
2025-07-01 05:47:09.302 cruncher.set_seqs(aelt, belt)
2025-07-01 05:47:09.309 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:47:09.322 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:47:09.334 if tag == 'replace':
2025-07-01 05:47:09.343 atags += '^' * la
2025-07-01 05:47:09.349 btags += '^' * lb
2025-07-01 05:47:09.354 elif tag == 'delete':
2025-07-01 05:47:09.359 atags += '-' * la
2025-07-01 05:47:09.371 elif tag == 'insert':
2025-07-01 05:47:09.383 btags += '+' * lb
2025-07-01 05:47:09.395 elif tag == 'equal':
2025-07-01 05:47:09.405 atags += ' ' * la
2025-07-01 05:47:09.413 btags += ' ' * lb
2025-07-01 05:47:09.421 else:
2025-07-01 05:47:09.426 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:47:09.432 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:47:09.438 else:
2025-07-01 05:47:09.444 # the synch pair is identical
2025-07-01 05:47:09.451 yield ' ' + aelt
2025-07-01 05:47:09.458
2025-07-01 05:47:09.465 # pump out diffs from after the synch point
2025-07-01 05:47:09.472 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:47:09.479
2025-07-01 05:47:09.486 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:47:09.495 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:47:09.506
2025-07-01 05:47:09.514 self = <difflib.Differ object at [hex]>
2025-07-01 05:47:09.521 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:47:09.527 alo = 211, ahi = 1101
2025-07-01 05:47:09.535 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:47:09.543 blo = 211, bhi = 1101
2025-07-01 05:47:09.554
2025-07-01 05:47:09.563 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:47:09.571 g = []
2025-07-01 05:47:09.577 if alo < ahi:
2025-07-01 05:47:09.583 if blo < bhi:
2025-07-01 05:47:09.590 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:47:09.599 else:
2025-07-01 05:47:09.610 g = self._dump('-', a, alo, ahi)
2025-07-01 05:47:09.617 elif blo < bhi:
2025-07-01 05:47:09.624 g = self._dump('+', b, blo, bhi)
2025-07-01 05:47:09.630
2025-07-01 05:47:09.637 > yield from g
2025-07-01 05:47:09.643
2025-07-01 05:47:09.650 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:47:09.661 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:47:09.669
2025-07-01 05:47:09.676 self = <difflib.Differ object at [hex]>
2025-07-01 05:47:09.683 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:47:09.691 alo = 211, ahi = 1101
2025-07-01 05:47:09.702 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:47:09.711 blo = 211, bhi = 1101
2025-07-01 05:47:09.719
2025-07-01 05:47:09.728 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:47:09.740 r"""
2025-07-01 05:47:09.749 When replacing one block of lines with another, search the blocks
2025-07-01 05:47:09.757 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:47:09.763 synch point, and intraline difference marking is done on the
2025-07-01 05:47:09.770 similar pair. Lots of work, but often worth it.
2025-07-01 05:47:09.780
2025-07-01 05:47:09.792 Example:
2025-07-01 05:47:09.802
2025-07-01 05:47:09.812 >>> d = Differ()
2025-07-01 05:47:09.823 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:47:09.832 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:47:09.839 >>> print(''.join(results), end="")
2025-07-01 05:47:09.847 - abcDefghiJkl
2025-07-01 05:47:09.871 + abcdefGhijkl
2025-07-01 05:47:09.893 """
2025-07-01 05:47:09.900
2025-07-01 05:47:09.907 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:47:09.915 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:47:09.921 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:47:09.927 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:47:09.935 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:47:09.946
2025-07-01 05:47:09.954 # search for the pair that matches best without being identical
2025-07-01 05:47:09.962 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:47:09.969 # on junk -- unless we have to)
2025-07-01 05:47:09.975 for j in range(blo, bhi):
2025-07-01 05:47:09.981 bj = b[j]
2025-07-01 05:47:09.987 cruncher.set_seq2(bj)
2025-07-01 05:47:09.992 for i in range(alo, ahi):
2025-07-01 05:47:09.998 ai = a[i]
2025-07-01 05:47:10.011 if ai == bj:
2025-07-01 05:47:10.022 if eqi is None:
2025-07-01 05:47:10.030 eqi, eqj = i, j
2025-07-01 05:47:10.040 continue
2025-07-01 05:47:10.051 cruncher.set_seq1(ai)
2025-07-01 05:47:10.060 # computing similarity is expensive, so use the quick
2025-07-01 05:47:10.068 # upper bounds first -- have seen this speed up messy
2025-07-01 05:47:10.075 # compares by a factor of 3.
2025-07-01 05:47:10.083 # note that ratio() is only expensive to compute the first
2025-07-01 05:47:10.095 # time it's called on a sequence pair; the expensive part
2025-07-01 05:47:10.103 # of the computation is cached by cruncher
2025-07-01 05:47:10.110 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:47:10.119 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:47:10.129 cruncher.ratio() > best_ratio:
2025-07-01 05:47:10.136 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:47:10.143 if best_ratio < cutoff:
2025-07-01 05:47:10.149 # no non-identical "pretty close" pair
2025-07-01 05:47:10.155 if eqi is None:
2025-07-01 05:47:10.161 # no identical pair either -- treat it as a straight replace
2025-07-01 05:47:10.167 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:47:10.175 return
2025-07-01 05:47:10.186 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:47:10.193 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:47:10.200 else:
2025-07-01 05:47:10.207 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:47:10.215 eqi = None
2025-07-01 05:47:10.226
2025-07-01 05:47:10.234 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:47:10.242 # identical
2025-07-01 05:47:10.253
2025-07-01 05:47:10.262 # pump out diffs from before the synch point
2025-07-01 05:47:10.275 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:47:10.285
2025-07-01 05:47:10.297 # do intraline marking on the synch pair
2025-07-01 05:47:10.308 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:47:10.320 if eqi is None:
2025-07-01 05:47:10.331 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:47:10.342 atags = btags = ""
2025-07-01 05:47:10.352 cruncher.set_seqs(aelt, belt)
2025-07-01 05:47:10.363 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:47:10.372 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:47:10.379 if tag == 'replace':
2025-07-01 05:47:10.389 atags += '^' * la
2025-07-01 05:47:10.396 btags += '^' * lb
2025-07-01 05:47:10.404 elif tag == 'delete':
2025-07-01 05:47:10.411 atags += '-' * la
2025-07-01 05:47:10.419 elif tag == 'insert':
2025-07-01 05:47:10.431 btags += '+' * lb
2025-07-01 05:47:10.441 elif tag == 'equal':
2025-07-01 05:47:10.451 atags += ' ' * la
2025-07-01 05:47:10.463 btags += ' ' * lb
2025-07-01 05:47:10.472 else:
2025-07-01 05:47:10.480 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:47:10.487 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:47:10.495 else:
2025-07-01 05:47:10.505 # the synch pair is identical
2025-07-01 05:47:10.513 yield ' ' + aelt
2025-07-01 05:47:10.520
2025-07-01 05:47:10.527 # pump out diffs from after the synch point
2025-07-01 05:47:10.533 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:47:10.539
2025-07-01 05:47:10.546 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:47:10.557 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:47:10.566
2025-07-01 05:47:10.575 self = <difflib.Differ object at [hex]>
2025-07-01 05:47:10.582 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:47:10.589 alo = 212, ahi = 1101
2025-07-01 05:47:10.598 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:47:10.609 blo = 212, bhi = 1101
2025-07-01 05:47:10.617
2025-07-01 05:47:10.629 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:47:10.639 g = []
2025-07-01 05:47:10.646 if alo < ahi:
2025-07-01 05:47:10.659 if blo < bhi:
2025-07-01 05:47:10.669 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:47:10.677 else:
2025-07-01 05:47:10.687 g = self._dump('-', a, alo, ahi)
2025-07-01 05:47:10.697 elif blo < bhi:
2025-07-01 05:47:10.708 g = self._dump('+', b, blo, bhi)
2025-07-01 05:47:10.717
2025-07-01 05:47:10.726 > yield from g
2025-07-01 05:47:10.737
2025-07-01 05:47:10.748 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:47:10.761 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:47:10.770
2025-07-01 05:47:10.779 self = <difflib.Differ object at [hex]>
2025-07-01 05:47:10.791 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:47:10.799 alo = 212, ahi = 1101
2025-07-01 05:47:10.807 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:47:10.815 blo = 212, bhi = 1101
2025-07-01 05:47:10.825
2025-07-01 05:47:10.832 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:47:10.840 r"""
2025-07-01 05:47:10.847 When replacing one block of lines with another, search the blocks
2025-07-01 05:47:10.856 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:47:10.866 synch point, and intraline difference marking is done on the
2025-07-01 05:47:10.874 similar pair. Lots of work, but often worth it.
2025-07-01 05:47:10.884
2025-07-01 05:47:10.892 Example:
2025-07-01 05:47:10.898
2025-07-01 05:47:10.904 >>> d = Differ()
2025-07-01 05:47:10.910 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:47:10.915 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:47:10.922 >>> print(''.join(results), end="")
2025-07-01 05:47:10.930 - abcDefghiJkl
2025-07-01 05:47:10.943 + abcdefGhijkl
2025-07-01 05:47:10.953 """
2025-07-01 05:47:10.958
2025-07-01 05:47:10.963 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:47:10.970 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:47:10.983 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:47:10.990 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:47:10.998 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:47:11.008
2025-07-01 05:47:11.017 # search for the pair that matches best without being identical
2025-07-01 05:47:11.025 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:47:11.032 # on junk -- unless we have to)
2025-07-01 05:47:11.038 for j in range(blo, bhi):
2025-07-01 05:47:11.042 bj = b[j]
2025-07-01 05:47:11.047 cruncher.set_seq2(bj)
2025-07-01 05:47:11.051 for i in range(alo, ahi):
2025-07-01 05:47:11.056 ai = a[i]
2025-07-01 05:47:11.061 if ai == bj:
2025-07-01 05:47:11.065 if eqi is None:
2025-07-01 05:47:11.077 eqi, eqj = i, j
2025-07-01 05:47:11.087 continue
2025-07-01 05:47:11.099 cruncher.set_seq1(ai)
2025-07-01 05:47:11.108 # computing similarity is expensive, so use the quick
2025-07-01 05:47:11.116 # upper bounds first -- have seen this speed up messy
2025-07-01 05:47:11.122 # compares by a factor of 3.
2025-07-01 05:47:11.129 # note that ratio() is only expensive to compute the first
2025-07-01 05:47:11.134 # time it's called on a sequence pair; the expensive part
2025-07-01 05:47:11.143 # of the computation is cached by cruncher
2025-07-01 05:47:11.154 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:47:11.163 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:47:11.172 cruncher.ratio() > best_ratio:
2025-07-01 05:47:11.179 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:47:11.192 if best_ratio < cutoff:
2025-07-01 05:47:11.204 # no non-identical "pretty close" pair
2025-07-01 05:47:11.214 if eqi is None:
2025-07-01 05:47:11.226 # no identical pair either -- treat it as a straight replace
2025-07-01 05:47:11.236 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:47:11.244 return
2025-07-01 05:47:11.256 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:47:11.267 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:47:11.274 else:
2025-07-01 05:47:11.283 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:47:11.295 eqi = None
2025-07-01 05:47:11.304
2025-07-01 05:47:11.313 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:47:11.319 # identical
2025-07-01 05:47:11.325
2025-07-01 05:47:11.331 # pump out diffs from before the synch point
2025-07-01 05:47:11.337 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:47:11.343
2025-07-01 05:47:11.348 # do intraline marking on the synch pair
2025-07-01 05:47:11.354 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:47:11.359 if eqi is None:
2025-07-01 05:47:11.366 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:47:11.378 atags = btags = ""
2025-07-01 05:47:11.389 cruncher.set_seqs(aelt, belt)
2025-07-01 05:47:11.399 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:47:11.411 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:47:11.423 if tag == 'replace':
2025-07-01 05:47:11.434 atags += '^' * la
2025-07-01 05:47:11.442 btags += '^' * lb
2025-07-01 05:47:11.452 elif tag == 'delete':
2025-07-01 05:47:11.460 atags += '-' * la
2025-07-01 05:47:11.467 elif tag == 'insert':
2025-07-01 05:47:11.473 btags += '+' * lb
2025-07-01 05:47:11.479 elif tag == 'equal':
2025-07-01 05:47:11.484 atags += ' ' * la
2025-07-01 05:47:11.489 btags += ' ' * lb
2025-07-01 05:47:11.495 else:
2025-07-01 05:47:11.500 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:47:11.507 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:47:11.513 else:
2025-07-01 05:47:11.519 # the synch pair is identical
2025-07-01 05:47:11.525 yield ' ' + aelt
2025-07-01 05:47:11.530
2025-07-01 05:47:11.536 # pump out diffs from after the synch point
2025-07-01 05:47:11.542 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:47:11.550
2025-07-01 05:47:11.558 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:47:11.567 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:47:11.578
2025-07-01 05:47:11.586 self = <difflib.Differ object at [hex]>
2025-07-01 05:47:11.595 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:47:11.603 alo = 213, ahi = 1101
2025-07-01 05:47:11.614 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:47:11.622 blo = 213, bhi = 1101
2025-07-01 05:47:11.628
2025-07-01 05:47:11.634 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:47:11.641 g = []
2025-07-01 05:47:11.647 if alo < ahi:
2025-07-01 05:47:11.653 if blo < bhi:
2025-07-01 05:47:11.658 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:47:11.663 else:
2025-07-01 05:47:11.669 g = self._dump('-', a, alo, ahi)
2025-07-01 05:47:11.674 elif blo < bhi:
2025-07-01 05:47:11.679 g = self._dump('+', b, blo, bhi)
2025-07-01 05:47:11.686
2025-07-01 05:47:11.691 > yield from g
2025-07-01 05:47:11.696
2025-07-01 05:47:11.702 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:47:11.715 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:47:11.722
2025-07-01 05:47:11.729 self = <difflib.Differ object at [hex]>
2025-07-01 05:47:11.736 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:47:11.743 alo = 213, ahi = 1101
2025-07-01 05:47:11.751 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:47:11.757 blo = 213, bhi = 1101
2025-07-01 05:47:11.764
2025-07-01 05:47:11.771 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:47:11.777 r"""
2025-07-01 05:47:11.784 When replacing one block of lines with another, search the blocks
2025-07-01 05:47:11.791 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:47:11.797 synch point, and intraline difference marking is done on the
2025-07-01 05:47:11.804 similar pair. Lots of work, but often worth it.
2025-07-01 05:47:11.811
2025-07-01 05:47:11.823 Example:
2025-07-01 05:47:11.832
2025-07-01 05:47:11.841 >>> d = Differ()
2025-07-01 05:47:11.848 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:47:11.860 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:47:11.872 >>> print(''.join(results), end="")
2025-07-01 05:47:11.883 - abcDefghiJkl
2025-07-01 05:47:11.904 + abcdefGhijkl
2025-07-01 05:47:11.923 """
2025-07-01 05:47:11.931
2025-07-01 05:47:11.939 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:47:11.947 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:47:11.953 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:47:11.959 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:47:11.965 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:47:11.973
2025-07-01 05:47:11.979 # search for the pair that matches best without being identical
2025-07-01 05:47:11.985 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:47:11.990 # on junk -- unless we have to)
2025-07-01 05:47:11.994 for j in range(blo, bhi):
2025-07-01 05:47:11.999 bj = b[j]
2025-07-01 05:47:12.005 cruncher.set_seq2(bj)
2025-07-01 05:47:12.010 for i in range(alo, ahi):
2025-07-01 05:47:12.018 ai = a[i]
2025-07-01 05:47:12.028 if ai == bj:
2025-07-01 05:47:12.038 if eqi is None:
2025-07-01 05:47:12.045 eqi, eqj = i, j
2025-07-01 05:47:12.051 continue
2025-07-01 05:47:12.056 cruncher.set_seq1(ai)
2025-07-01 05:47:12.062 # computing similarity is expensive, so use the quick
2025-07-01 05:47:12.068 # upper bounds first -- have seen this speed up messy
2025-07-01 05:47:12.074 # compares by a factor of 3.
2025-07-01 05:47:12.080 # note that ratio() is only expensive to compute the first
2025-07-01 05:47:12.086 # time it's called on a sequence pair; the expensive part
2025-07-01 05:47:12.093 # of the computation is cached by cruncher
2025-07-01 05:47:12.100 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:47:12.107 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:47:12.115 cruncher.ratio() > best_ratio:
2025-07-01 05:47:12.126 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:47:12.138 if best_ratio < cutoff:
2025-07-01 05:47:12.148 # no non-identical "pretty close" pair
2025-07-01 05:47:12.154 if eqi is None:
2025-07-01 05:47:12.161 # no identical pair either -- treat it as a straight replace
2025-07-01 05:47:12.168 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:47:12.174 return
2025-07-01 05:47:12.186 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:47:12.195 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:47:12.204 else:
2025-07-01 05:47:12.212 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:47:12.217 eqi = None
2025-07-01 05:47:12.223
2025-07-01 05:47:12.231 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:47:12.241 # identical
2025-07-01 05:47:12.249
2025-07-01 05:47:12.256 # pump out diffs from before the synch point
2025-07-01 05:47:12.262 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:47:12.271
2025-07-01 05:47:12.281 # do intraline marking on the synch pair
2025-07-01 05:47:12.289 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:47:12.296 if eqi is None:
2025-07-01 05:47:12.303 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:47:12.308 atags = btags = ""
2025-07-01 05:47:12.314 cruncher.set_seqs(aelt, belt)
2025-07-01 05:47:12.325 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:47:12.335 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:47:12.343 if tag == 'replace':
2025-07-01 05:47:12.351 atags += '^' * la
2025-07-01 05:47:12.356 btags += '^' * lb
2025-07-01 05:47:12.361 elif tag == 'delete':
2025-07-01 05:47:12.366 atags += '-' * la
2025-07-01 05:47:12.371 elif tag == 'insert':
2025-07-01 05:47:12.376 btags += '+' * lb
2025-07-01 05:47:12.381 elif tag == 'equal':
2025-07-01 05:47:12.387 atags += ' ' * la
2025-07-01 05:47:12.395 btags += ' ' * lb
2025-07-01 05:47:12.403 else:
2025-07-01 05:47:12.413 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:47:12.421 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:47:12.427 else:
2025-07-01 05:47:12.433 # the synch pair is identical
2025-07-01 05:47:12.440 yield ' ' + aelt
2025-07-01 05:47:12.447
2025-07-01 05:47:12.454 # pump out diffs from after the synch point
2025-07-01 05:47:12.463 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:47:12.474
2025-07-01 05:47:12.482 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:47:12.488 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:47:12.495
2025-07-01 05:47:12.502 self = <difflib.Differ object at [hex]>
2025-07-01 05:47:12.511 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:47:12.523 alo = 214, ahi = 1101
2025-07-01 05:47:12.531 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:47:12.537 blo = 214, bhi = 1101
2025-07-01 05:47:12.543
2025-07-01 05:47:12.550 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:47:12.556 g = []
2025-07-01 05:47:12.563 if alo < ahi:
2025-07-01 05:47:12.570 if blo < bhi:
2025-07-01 05:47:12.581 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:47:12.587 else:
2025-07-01 05:47:12.594 g = self._dump('-', a, alo, ahi)
2025-07-01 05:47:12.601 elif blo < bhi:
2025-07-01 05:47:12.608 g = self._dump('+', b, blo, bhi)
2025-07-01 05:47:12.615
2025-07-01 05:47:12.622 > yield from g
2025-07-01 05:47:12.632
2025-07-01 05:47:12.643 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:47:12.649 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:47:12.655
2025-07-01 05:47:12.660 self = <difflib.Differ object at [hex]>
2025-07-01 05:47:12.665 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:47:12.670 alo = 214, ahi = 1101
2025-07-01 05:47:12.676 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:47:12.682 blo = 214, bhi = 1101
2025-07-01 05:47:12.688
2025-07-01 05:47:12.695 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:47:12.704 r"""
2025-07-01 05:47:12.714 When replacing one block of lines with another, search the blocks
2025-07-01 05:47:12.722 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:47:12.730 synch point, and intraline difference marking is done on the
2025-07-01 05:47:12.737 similar pair. Lots of work, but often worth it.
2025-07-01 05:47:12.744
2025-07-01 05:47:12.757 Example:
2025-07-01 05:47:12.768
2025-07-01 05:47:12.776 >>> d = Differ()
2025-07-01 05:47:12.781 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:47:12.786 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:47:12.791 >>> print(''.join(results), end="")
2025-07-01 05:47:12.797 - abcDefghiJkl
2025-07-01 05:47:12.812 + abcdefGhijkl
2025-07-01 05:47:12.828 """
2025-07-01 05:47:12.835
2025-07-01 05:47:12.842 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:47:12.852 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:47:12.865 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:47:12.874 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:47:12.882 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:47:12.893
2025-07-01 05:47:12.906 # search for the pair that matches best without being identical
2025-07-01 05:47:12.918 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:47:12.928 # on junk -- unless we have to)
2025-07-01 05:47:12.940 for j in range(blo, bhi):
2025-07-01 05:47:12.953 bj = b[j]
2025-07-01 05:47:12.964 cruncher.set_seq2(bj)
2025-07-01 05:47:12.972 for i in range(alo, ahi):
2025-07-01 05:47:12.978 ai = a[i]
2025-07-01 05:47:12.990 if ai == bj:
2025-07-01 05:47:12.998 if eqi is None:
2025-07-01 05:47:13.006 eqi, eqj = i, j
2025-07-01 05:47:13.011 continue
2025-07-01 05:47:13.016 cruncher.set_seq1(ai)
2025-07-01 05:47:13.022 # computing similarity is expensive, so use the quick
2025-07-01 05:47:13.033 # upper bounds first -- have seen this speed up messy
2025-07-01 05:47:13.041 # compares by a factor of 3.
2025-07-01 05:47:13.046 # note that ratio() is only expensive to compute the first
2025-07-01 05:47:13.053 # time it's called on a sequence pair; the expensive part
2025-07-01 05:47:13.060 # of the computation is cached by cruncher
2025-07-01 05:47:13.068 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:47:13.074 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:47:13.079 cruncher.ratio() > best_ratio:
2025-07-01 05:47:13.088 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:47:13.099 if best_ratio < cutoff:
2025-07-01 05:47:13.107 # no non-identical "pretty close" pair
2025-07-01 05:47:13.115 if eqi is None:
2025-07-01 05:47:13.123 # no identical pair either -- treat it as a straight replace
2025-07-01 05:47:13.131 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:47:13.143 return
2025-07-01 05:47:13.151 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:47:13.159 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:47:13.171 else:
2025-07-01 05:47:13.181 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:47:13.189 eqi = None
2025-07-01 05:47:13.201
2025-07-01 05:47:13.210 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:47:13.216 # identical
2025-07-01 05:47:13.222
2025-07-01 05:47:13.228 # pump out diffs from before the synch point
2025-07-01 05:47:13.238 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:47:13.245
2025-07-01 05:47:13.257 # do intraline marking on the synch pair
2025-07-01 05:47:13.268 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:47:13.277 if eqi is None:
2025-07-01 05:47:13.285 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:47:13.291 atags = btags = ""
2025-07-01 05:47:13.297 cruncher.set_seqs(aelt, belt)
2025-07-01 05:47:13.304 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:47:13.310 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:47:13.316 if tag == 'replace':
2025-07-01 05:47:13.323 atags += '^' * la
2025-07-01 05:47:13.330 btags += '^' * lb
2025-07-01 05:47:13.341 elif tag == 'delete':
2025-07-01 05:47:13.350 atags += '-' * la
2025-07-01 05:47:13.359 elif tag == 'insert':
2025-07-01 05:47:13.366 btags += '+' * lb
2025-07-01 05:47:13.374 elif tag == 'equal':
2025-07-01 05:47:13.380 atags += ' ' * la
2025-07-01 05:47:13.387 btags += ' ' * lb
2025-07-01 05:47:13.396 else:
2025-07-01 05:47:13.404 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:47:13.411 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:47:13.418 else:
2025-07-01 05:47:13.425 # the synch pair is identical
2025-07-01 05:47:13.432 yield ' ' + aelt
2025-07-01 05:47:13.439
2025-07-01 05:47:13.447 # pump out diffs from after the synch point
2025-07-01 05:47:13.460 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:47:13.471
2025-07-01 05:47:13.481 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:47:13.494 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:47:13.503
2025-07-01 05:47:13.512 self = <difflib.Differ object at [hex]>
2025-07-01 05:47:13.520 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:47:13.526 alo = 215, ahi = 1101
2025-07-01 05:47:13.532 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:47:13.537 blo = 215, bhi = 1101
2025-07-01 05:47:13.546
2025-07-01 05:47:13.558 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:47:13.566 g = []
2025-07-01 05:47:13.572 if alo < ahi:
2025-07-01 05:47:13.581 if blo < bhi:
2025-07-01 05:47:13.587 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:47:13.595 else:
2025-07-01 05:47:13.606 g = self._dump('-', a, alo, ahi)
2025-07-01 05:47:13.615 elif blo < bhi:
2025-07-01 05:47:13.622 g = self._dump('+', b, blo, bhi)
2025-07-01 05:47:13.628
2025-07-01 05:47:13.635 > yield from g
2025-07-01 05:47:13.643
2025-07-01 05:47:13.653 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:47:13.663 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:47:13.670
2025-07-01 05:47:13.680 self = <difflib.Differ object at [hex]>
2025-07-01 05:47:13.688 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:47:13.695 alo = 215, ahi = 1101
2025-07-01 05:47:13.704 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:47:13.710 blo = 215, bhi = 1101
2025-07-01 05:47:13.715
2025-07-01 05:47:13.723 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:47:13.733 r"""
2025-07-01 05:47:13.741 When replacing one block of lines with another, search the blocks
2025-07-01 05:47:13.748 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:47:13.756 synch point, and intraline difference marking is done on the
2025-07-01 05:47:13.764 similar pair. Lots of work, but often worth it.
2025-07-01 05:47:13.770
2025-07-01 05:47:13.776 Example:
2025-07-01 05:47:13.781
2025-07-01 05:47:13.787 >>> d = Differ()
2025-07-01 05:47:13.793 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:47:13.799 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:47:13.805 >>> print(''.join(results), end="")
2025-07-01 05:47:13.810 - abcDefghiJkl
2025-07-01 05:47:13.831 + abcdefGhijkl
2025-07-01 05:47:13.851 """
2025-07-01 05:47:13.859
2025-07-01 05:47:13.868 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:47:13.878 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:47:13.887 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:47:13.895 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:47:13.906 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:47:13.915
2025-07-01 05:47:13.925 # search for the pair that matches best without being identical
2025-07-01 05:47:13.936 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:47:13.947 # on junk -- unless we have to)
2025-07-01 05:47:13.955 for j in range(blo, bhi):
2025-07-01 05:47:13.962 bj = b[j]
2025-07-01 05:47:13.972 cruncher.set_seq2(bj)
2025-07-01 05:47:13.983 for i in range(alo, ahi):
2025-07-01 05:47:13.992 ai = a[i]
2025-07-01 05:47:14.004 if ai == bj:
2025-07-01 05:47:14.013 if eqi is None:
2025-07-01 05:47:14.021 eqi, eqj = i, j
2025-07-01 05:47:14.027 continue
2025-07-01 05:47:14.035 cruncher.set_seq1(ai)
2025-07-01 05:47:14.048 # computing similarity is expensive, so use the quick
2025-07-01 05:47:14.060 # upper bounds first -- have seen this speed up messy
2025-07-01 05:47:14.071 # compares by a factor of 3.
2025-07-01 05:47:14.084 # note that ratio() is only expensive to compute the first
2025-07-01 05:47:14.095 # time it's called on a sequence pair; the expensive part
2025-07-01 05:47:14.103 # of the computation is cached by cruncher
2025-07-01 05:47:14.111 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:47:14.117 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:47:14.124 cruncher.ratio() > best_ratio:
2025-07-01 05:47:14.130 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:47:14.138 if best_ratio < cutoff:
2025-07-01 05:47:14.145 # no non-identical "pretty close" pair
2025-07-01 05:47:14.151 if eqi is None:
2025-07-01 05:47:14.161 # no identical pair either -- treat it as a straight replace
2025-07-01 05:47:14.170 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:47:14.178 return
2025-07-01 05:47:14.189 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:47:14.199 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:47:14.206 else:
2025-07-01 05:47:14.212 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:47:14.218 eqi = None
2025-07-01 05:47:14.224
2025-07-01 05:47:14.231 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:47:14.239 # identical
2025-07-01 05:47:14.247
2025-07-01 05:47:14.258 # pump out diffs from before the synch point
2025-07-01 05:47:14.270 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:47:14.280
2025-07-01 05:47:14.288 # do intraline marking on the synch pair
2025-07-01 05:47:14.295 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:47:14.301 if eqi is None:
2025-07-01 05:47:14.307 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:47:14.313 atags = btags = ""
2025-07-01 05:47:14.318 cruncher.set_seqs(aelt, belt)
2025-07-01 05:47:14.325 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:47:14.331 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:47:14.337 if tag == 'replace':
2025-07-01 05:47:14.352 atags += '^' * la
2025-07-01 05:47:14.364 btags += '^' * lb
2025-07-01 05:47:14.371 elif tag == 'delete':
2025-07-01 05:47:14.378 atags += '-' * la
2025-07-01 05:47:14.385 elif tag == 'insert':
2025-07-01 05:47:14.392 btags += '+' * lb
2025-07-01 05:47:14.398 elif tag == 'equal':
2025-07-01 05:47:14.406 atags += ' ' * la
2025-07-01 05:47:14.413 btags += ' ' * lb
2025-07-01 05:47:14.420 else:
2025-07-01 05:47:14.427 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:47:14.437 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:47:14.447 else:
2025-07-01 05:47:14.456 # the synch pair is identical
2025-07-01 05:47:14.463 yield ' ' + aelt
2025-07-01 05:47:14.469
2025-07-01 05:47:14.474 # pump out diffs from after the synch point
2025-07-01 05:47:14.480 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:47:14.487
2025-07-01 05:47:14.494 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:47:14.500 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:47:14.506
2025-07-01 05:47:14.518 self = <difflib.Differ object at [hex]>
2025-07-01 05:47:14.529 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:47:14.537 alo = 216, ahi = 1101
2025-07-01 05:47:14.544 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:47:14.551 blo = 216, bhi = 1101
2025-07-01 05:47:14.561
2025-07-01 05:47:14.572 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:47:14.581 g = []
2025-07-01 05:47:14.587 if alo < ahi:
2025-07-01 05:47:14.592 if blo < bhi:
2025-07-01 05:47:14.597 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:47:14.602 else:
2025-07-01 05:47:14.608 g = self._dump('-', a, alo, ahi)
2025-07-01 05:47:14.615 elif blo < bhi:
2025-07-01 05:47:14.622 g = self._dump('+', b, blo, bhi)
2025-07-01 05:47:14.632
2025-07-01 05:47:14.642 > yield from g
2025-07-01 05:47:14.649
2025-07-01 05:47:14.655 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:47:14.661 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:47:14.665
2025-07-01 05:47:14.670 self = <difflib.Differ object at [hex]>
2025-07-01 05:47:14.676 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:47:14.680 alo = 216, ahi = 1101
2025-07-01 05:47:14.686 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:47:14.690 blo = 216, bhi = 1101
2025-07-01 05:47:14.695
2025-07-01 05:47:14.700 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:47:14.706 r"""
2025-07-01 05:47:14.713 When replacing one block of lines with another, search the blocks
2025-07-01 05:47:14.718 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:47:14.726 synch point, and intraline difference marking is done on the
2025-07-01 05:47:14.734 similar pair. Lots of work, but often worth it.
2025-07-01 05:47:14.739
2025-07-01 05:47:14.745 Example:
2025-07-01 05:47:14.749
2025-07-01 05:47:14.755 >>> d = Differ()
2025-07-01 05:47:14.760 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:47:14.766 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:47:14.772 >>> print(''.join(results), end="")
2025-07-01 05:47:14.780 - abcDefghiJkl
2025-07-01 05:47:14.792 + abcdefGhijkl
2025-07-01 05:47:14.804 """
2025-07-01 05:47:14.810
2025-07-01 05:47:14.820 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:47:14.829 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:47:14.837 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:47:14.842 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:47:14.848 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:47:14.853
2025-07-01 05:47:14.859 # search for the pair that matches best without being identical
2025-07-01 05:47:14.866 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:47:14.871 # on junk -- unless we have to)
2025-07-01 05:47:14.877 for j in range(blo, bhi):
2025-07-01 05:47:14.882 bj = b[j]
2025-07-01 05:47:14.888 cruncher.set_seq2(bj)
2025-07-01 05:47:14.896 for i in range(alo, ahi):
2025-07-01 05:47:14.901 ai = a[i]
2025-07-01 05:47:14.907 if ai == bj:
2025-07-01 05:47:14.912 if eqi is None:
2025-07-01 05:47:14.918 eqi, eqj = i, j
2025-07-01 05:47:14.924 continue
2025-07-01 05:47:14.931 cruncher.set_seq1(ai)
2025-07-01 05:47:14.941 # computing similarity is expensive, so use the quick
2025-07-01 05:47:14.948 # upper bounds first -- have seen this speed up messy
2025-07-01 05:47:14.955 # compares by a factor of 3.
2025-07-01 05:47:14.960 # note that ratio() is only expensive to compute the first
2025-07-01 05:47:14.965 # time it's called on a sequence pair; the expensive part
2025-07-01 05:47:14.970 # of the computation is cached by cruncher
2025-07-01 05:47:14.974 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:47:14.979 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:47:14.983 cruncher.ratio() > best_ratio:
2025-07-01 05:47:14.988 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:47:14.992 if best_ratio < cutoff:
2025-07-01 05:47:14.997 # no non-identical "pretty close" pair
2025-07-01 05:47:15.003 if eqi is None:
2025-07-01 05:47:15.009 # no identical pair either -- treat it as a straight replace
2025-07-01 05:47:15.016 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:47:15.023 return
2025-07-01 05:47:15.030 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:47:15.036 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:47:15.042 else:
2025-07-01 05:47:15.049 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:47:15.056 eqi = None
2025-07-01 05:47:15.061
2025-07-01 05:47:15.067 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:47:15.073 # identical
2025-07-01 05:47:15.080
2025-07-01 05:47:15.086 # pump out diffs from before the synch point
2025-07-01 05:47:15.093 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:47:15.099
2025-07-01 05:47:15.105 # do intraline marking on the synch pair
2025-07-01 05:47:15.111 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:47:15.117 if eqi is None:
2025-07-01 05:47:15.123 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:47:15.128 atags = btags = ""
2025-07-01 05:47:15.133 cruncher.set_seqs(aelt, belt)
2025-07-01 05:47:15.138 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:47:15.143 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:47:15.148 if tag == 'replace':
2025-07-01 05:47:15.153 atags += '^' * la
2025-07-01 05:47:15.159 btags += '^' * lb
2025-07-01 05:47:15.165 elif tag == 'delete':
2025-07-01 05:47:15.171 atags += '-' * la
2025-07-01 05:47:15.177 elif tag == 'insert':
2025-07-01 05:47:15.183 btags += '+' * lb
2025-07-01 05:47:15.190 elif tag == 'equal':
2025-07-01 05:47:15.201 atags += ' ' * la
2025-07-01 05:47:15.211 btags += ' ' * lb
2025-07-01 05:47:15.219 else:
2025-07-01 05:47:15.225 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:47:15.230 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:47:15.234 else:
2025-07-01 05:47:15.239 # the synch pair is identical
2025-07-01 05:47:15.243 yield ' ' + aelt
2025-07-01 05:47:15.249
2025-07-01 05:47:15.258 # pump out diffs from after the synch point
2025-07-01 05:47:15.268 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:47:15.278
2025-07-01 05:47:15.290 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:47:15.299 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:47:15.307
2025-07-01 05:47:15.313 self = <difflib.Differ object at [hex]>
2025-07-01 05:47:15.321 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:47:15.327 alo = 217, ahi = 1101
2025-07-01 05:47:15.334 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:47:15.340 blo = 217, bhi = 1101
2025-07-01 05:47:15.347
2025-07-01 05:47:15.357 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:47:15.365 g = []
2025-07-01 05:47:15.372 if alo < ahi:
2025-07-01 05:47:15.379 if blo < bhi:
2025-07-01 05:47:15.385 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:47:15.391 else:
2025-07-01 05:47:15.397 g = self._dump('-', a, alo, ahi)
2025-07-01 05:47:15.408 elif blo < bhi:
2025-07-01 05:47:15.420 g = self._dump('+', b, blo, bhi)
2025-07-01 05:47:15.429
2025-07-01 05:47:15.438 > yield from g
2025-07-01 05:47:15.450
2025-07-01 05:47:15.462 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:47:15.472 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:47:15.481
2025-07-01 05:47:15.488 self = <difflib.Differ object at [hex]>
2025-07-01 05:47:15.500 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:47:15.516 alo = 217, ahi = 1101
2025-07-01 05:47:15.529 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:47:15.538 blo = 217, bhi = 1101
2025-07-01 05:47:15.545
2025-07-01 05:47:15.551 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:47:15.560 r"""
2025-07-01 05:47:15.573 When replacing one block of lines with another, search the blocks
2025-07-01 05:47:15.582 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:47:15.593 synch point, and intraline difference marking is done on the
2025-07-01 05:47:15.605 similar pair. Lots of work, but often worth it.
2025-07-01 05:47:15.615
2025-07-01 05:47:15.626 Example:
2025-07-01 05:47:15.638
2025-07-01 05:47:15.648 >>> d = Differ()
2025-07-01 05:47:15.657 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:47:15.667 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:47:15.680 >>> print(''.join(results), end="")
2025-07-01 05:47:15.692 - abcDefghiJkl
2025-07-01 05:47:15.718 + abcdefGhijkl
2025-07-01 05:47:15.738 """
2025-07-01 05:47:15.748
2025-07-01 05:47:15.759 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:47:15.768 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:47:15.779 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:47:15.793 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:47:15.806 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:47:15.816
2025-07-01 05:47:15.825 # search for the pair that matches best without being identical
2025-07-01 05:47:15.837 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:47:15.848 # on junk -- unless we have to)
2025-07-01 05:47:15.861 for j in range(blo, bhi):
2025-07-01 05:47:15.872 bj = b[j]
2025-07-01 05:47:15.884 cruncher.set_seq2(bj)
2025-07-01 05:47:15.893 for i in range(alo, ahi):
2025-07-01 05:47:15.902 ai = a[i]
2025-07-01 05:47:15.914 if ai == bj:
2025-07-01 05:47:15.925 if eqi is None:
2025-07-01 05:47:15.933 eqi, eqj = i, j
2025-07-01 05:47:15.940 continue
2025-07-01 05:47:15.948 cruncher.set_seq1(ai)
2025-07-01 05:47:15.955 # computing similarity is expensive, so use the quick
2025-07-01 05:47:15.963 # upper bounds first -- have seen this speed up messy
2025-07-01 05:47:15.970 # compares by a factor of 3.
2025-07-01 05:47:15.977 # note that ratio() is only expensive to compute the first
2025-07-01 05:47:15.983 # time it's called on a sequence pair; the expensive part
2025-07-01 05:47:15.991 # of the computation is cached by cruncher
2025-07-01 05:47:16.002 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:47:16.012 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:47:16.019 cruncher.ratio() > best_ratio:
2025-07-01 05:47:16.027 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:47:16.034 if best_ratio < cutoff:
2025-07-01 05:47:16.049 # no non-identical "pretty close" pair
2025-07-01 05:47:16.059 if eqi is None:
2025-07-01 05:47:16.067 # no identical pair either -- treat it as a straight replace
2025-07-01 05:47:16.074 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:47:16.083 return
2025-07-01 05:47:16.097 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:47:16.107 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:47:16.115 else:
2025-07-01 05:47:16.123 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:47:16.134 eqi = None
2025-07-01 05:47:16.146
2025-07-01 05:47:16.158 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:47:16.167 # identical
2025-07-01 05:47:16.175
2025-07-01 05:47:16.188 # pump out diffs from before the synch point
2025-07-01 05:47:16.198 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:47:16.209
2025-07-01 05:47:16.219 # do intraline marking on the synch pair
2025-07-01 05:47:16.230 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:47:16.242 if eqi is None:
2025-07-01 05:47:16.252 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:47:16.262 atags = btags = ""
2025-07-01 05:47:16.272 cruncher.set_seqs(aelt, belt)
2025-07-01 05:47:16.283 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:47:16.294 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:47:16.303 if tag == 'replace':
2025-07-01 05:47:16.311 atags += '^' * la
2025-07-01 05:47:16.318 btags += '^' * lb
2025-07-01 05:47:16.331 elif tag == 'delete':
2025-07-01 05:47:16.344 atags += '-' * la
2025-07-01 05:47:16.354 elif tag == 'insert':
2025-07-01 05:47:16.367 btags += '+' * lb
2025-07-01 05:47:16.376 elif tag == 'equal':
2025-07-01 05:47:16.383 atags += ' ' * la
2025-07-01 05:47:16.389 btags += ' ' * lb
2025-07-01 05:47:16.395 else:
2025-07-01 05:47:16.399 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:47:16.404 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:47:16.409 else:
2025-07-01 05:47:16.414 # the synch pair is identical
2025-07-01 05:47:16.419 yield ' ' + aelt
2025-07-01 05:47:16.423
2025-07-01 05:47:16.429 # pump out diffs from after the synch point
2025-07-01 05:47:16.436 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:47:16.441
2025-07-01 05:47:16.447 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:47:16.453 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:47:16.459
2025-07-01 05:47:16.464 self = <difflib.Differ object at [hex]>
2025-07-01 05:47:16.471 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:47:16.477 alo = 218, ahi = 1101
2025-07-01 05:47:16.484 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:47:16.490 blo = 218, bhi = 1101
2025-07-01 05:47:16.501
2025-07-01 05:47:16.510 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:47:16.516 g = []
2025-07-01 05:47:16.523 if alo < ahi:
2025-07-01 05:47:16.527 if blo < bhi:
2025-07-01 05:47:16.532 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:47:16.537 else:
2025-07-01 05:47:16.542 g = self._dump('-', a, alo, ahi)
2025-07-01 05:47:16.550 elif blo < bhi:
2025-07-01 05:47:16.557 g = self._dump('+', b, blo, bhi)
2025-07-01 05:47:16.563
2025-07-01 05:47:16.570 > yield from g
2025-07-01 05:47:16.576
2025-07-01 05:47:16.583 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:47:16.590 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:47:16.599
2025-07-01 05:47:16.612 self = <difflib.Differ object at [hex]>
2025-07-01 05:47:16.623 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:47:16.631 alo = 218, ahi = 1101
2025-07-01 05:47:16.638 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:47:16.644 blo = 218, bhi = 1101
2025-07-01 05:47:16.649
2025-07-01 05:47:16.656 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:47:16.663 r"""
2025-07-01 05:47:16.670 When replacing one block of lines with another, search the blocks
2025-07-01 05:47:16.677 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:47:16.683 synch point, and intraline difference marking is done on the
2025-07-01 05:47:16.688 similar pair. Lots of work, but often worth it.
2025-07-01 05:47:16.694
2025-07-01 05:47:16.703 Example:
2025-07-01 05:47:16.712
2025-07-01 05:47:16.720 >>> d = Differ()
2025-07-01 05:47:16.727 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:47:16.732 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:47:16.738 >>> print(''.join(results), end="")
2025-07-01 05:47:16.744 - abcDefghiJkl
2025-07-01 05:47:16.756 + abcdefGhijkl
2025-07-01 05:47:16.769 """
2025-07-01 05:47:16.775
2025-07-01 05:47:16.780 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:47:16.786 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:47:16.792 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:47:16.798 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:47:16.804 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:47:16.810
2025-07-01 05:47:16.816 # search for the pair that matches best without being identical
2025-07-01 05:47:16.822 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:47:16.828 # on junk -- unless we have to)
2025-07-01 05:47:16.836 for j in range(blo, bhi):
2025-07-01 05:47:16.847 bj = b[j]
2025-07-01 05:47:16.857 cruncher.set_seq2(bj)
2025-07-01 05:47:16.864 for i in range(alo, ahi):
2025-07-01 05:47:16.875 ai = a[i]
2025-07-01 05:47:16.886 if ai == bj:
2025-07-01 05:47:16.898 if eqi is None:
2025-07-01 05:47:16.907 eqi, eqj = i, j
2025-07-01 05:47:16.914 continue
2025-07-01 05:47:16.921 cruncher.set_seq1(ai)
2025-07-01 05:47:16.934 # computing similarity is expensive, so use the quick
2025-07-01 05:47:16.942 # upper bounds first -- have seen this speed up messy
2025-07-01 05:47:16.948 # compares by a factor of 3.
2025-07-01 05:47:16.953 # note that ratio() is only expensive to compute the first
2025-07-01 05:47:16.958 # time it's called on a sequence pair; the expensive part
2025-07-01 05:47:16.965 # of the computation is cached by cruncher
2025-07-01 05:47:16.975 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:47:16.984 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:47:16.990 cruncher.ratio() > best_ratio:
2025-07-01 05:47:16.996 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:47:17.002 if best_ratio < cutoff:
2025-07-01 05:47:17.008 # no non-identical "pretty close" pair
2025-07-01 05:47:17.015 if eqi is None:
2025-07-01 05:47:17.021 # no identical pair either -- treat it as a straight replace
2025-07-01 05:47:17.028 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:47:17.035 return
2025-07-01 05:47:17.042 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:47:17.053 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:47:17.062 else:
2025-07-01 05:47:17.071 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:47:17.078 eqi = None
2025-07-01 05:47:17.092
2025-07-01 05:47:17.102 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:47:17.112 # identical
2025-07-01 05:47:17.120
2025-07-01 05:47:17.126 # pump out diffs from before the synch point
2025-07-01 05:47:17.138 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:47:17.146
2025-07-01 05:47:17.155 # do intraline marking on the synch pair
2025-07-01 05:47:17.168 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:47:17.179 if eqi is None:
2025-07-01 05:47:17.190 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:47:17.200 atags = btags = ""
2025-07-01 05:47:17.211 cruncher.set_seqs(aelt, belt)
2025-07-01 05:47:17.224 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:47:17.231 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:47:17.238 if tag == 'replace':
2025-07-01 05:47:17.244 atags += '^' * la
2025-07-01 05:47:17.250 btags += '^' * lb
2025-07-01 05:47:17.255 elif tag == 'delete':
2025-07-01 05:47:17.261 atags += '-' * la
2025-07-01 05:47:17.270 elif tag == 'insert':
2025-07-01 05:47:17.278 btags += '+' * lb
2025-07-01 05:47:17.285 elif tag == 'equal':
2025-07-01 05:47:17.291 atags += ' ' * la
2025-07-01 05:47:17.299 btags += ' ' * lb
2025-07-01 05:47:17.310 else:
2025-07-01 05:47:17.320 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:47:17.329 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:47:17.337 else:
2025-07-01 05:47:17.349 # the synch pair is identical
2025-07-01 05:47:17.360 yield ' ' + aelt
2025-07-01 05:47:17.368
2025-07-01 05:47:17.380 # pump out diffs from after the synch point
2025-07-01 05:47:17.389 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:47:17.400
2025-07-01 05:47:17.412 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:47:17.420 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:47:17.427
2025-07-01 05:47:17.437 self = <difflib.Differ object at [hex]>
2025-07-01 05:47:17.450 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:47:17.460 alo = 219, ahi = 1101
2025-07-01 05:47:17.467 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:47:17.474 blo = 219, bhi = 1101
2025-07-01 05:47:17.480
2025-07-01 05:47:17.486 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:47:17.498 g = []
2025-07-01 05:47:17.508 if alo < ahi:
2025-07-01 05:47:17.515 if blo < bhi:
2025-07-01 05:47:17.520 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:47:17.526 else:
2025-07-01 05:47:17.532 g = self._dump('-', a, alo, ahi)
2025-07-01 05:47:17.537 elif blo < bhi:
2025-07-01 05:47:17.549 g = self._dump('+', b, blo, bhi)
2025-07-01 05:47:17.557
2025-07-01 05:47:17.563 > yield from g
2025-07-01 05:47:17.568
2025-07-01 05:47:17.573 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:47:17.578 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:47:17.582
2025-07-01 05:47:17.588 self = <difflib.Differ object at [hex]>
2025-07-01 05:47:17.594 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:47:17.600 alo = 219, ahi = 1101
2025-07-01 05:47:17.606 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:47:17.613 blo = 219, bhi = 1101
2025-07-01 05:47:17.619
2025-07-01 05:47:17.626 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:47:17.633 r"""
2025-07-01 05:47:17.639 When replacing one block of lines with another, search the blocks
2025-07-01 05:47:17.645 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:47:17.651 synch point, and intraline difference marking is done on the
2025-07-01 05:47:17.657 similar pair. Lots of work, but often worth it.
2025-07-01 05:47:17.663
2025-07-01 05:47:17.669 Example:
2025-07-01 05:47:17.674
2025-07-01 05:47:17.679 >>> d = Differ()
2025-07-01 05:47:17.685 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:47:17.691 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:47:17.696 >>> print(''.join(results), end="")
2025-07-01 05:47:17.703 - abcDefghiJkl
2025-07-01 05:47:17.722 + abcdefGhijkl
2025-07-01 05:47:17.740 """
2025-07-01 05:47:17.745
2025-07-01 05:47:17.750 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:47:17.755 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:47:17.760 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:47:17.764 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:47:17.771 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:47:17.777
2025-07-01 05:47:17.784 # search for the pair that matches best without being identical
2025-07-01 05:47:17.791 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:47:17.798 # on junk -- unless we have to)
2025-07-01 05:47:17.807 for j in range(blo, bhi):
2025-07-01 05:47:17.817 bj = b[j]
2025-07-01 05:47:17.826 cruncher.set_seq2(bj)
2025-07-01 05:47:17.832 for i in range(alo, ahi):
2025-07-01 05:47:17.838 ai = a[i]
2025-07-01 05:47:17.842 if ai == bj:
2025-07-01 05:47:17.847 if eqi is None:
2025-07-01 05:47:17.852 eqi, eqj = i, j
2025-07-01 05:47:17.857 continue
2025-07-01 05:47:17.863 cruncher.set_seq1(ai)
2025-07-01 05:47:17.869 # computing similarity is expensive, so use the quick
2025-07-01 05:47:17.875 # upper bounds first -- have seen this speed up messy
2025-07-01 05:47:17.881 # compares by a factor of 3.
2025-07-01 05:47:17.886 # note that ratio() is only expensive to compute the first
2025-07-01 05:47:17.892 # time it's called on a sequence pair; the expensive part
2025-07-01 05:47:17.896 # of the computation is cached by cruncher
2025-07-01 05:47:17.901 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:47:17.906 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:47:17.911 cruncher.ratio() > best_ratio:
2025-07-01 05:47:17.916 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:47:17.921 if best_ratio < cutoff:
2025-07-01 05:47:17.927 # no non-identical "pretty close" pair
2025-07-01 05:47:17.933 if eqi is None:
2025-07-01 05:47:17.939 # no identical pair either -- treat it as a straight replace
2025-07-01 05:47:17.945 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:47:17.951 return
2025-07-01 05:47:17.958 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:47:17.966 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:47:17.977 else:
2025-07-01 05:47:17.985 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:47:17.992 eqi = None
2025-07-01 05:47:17.998
2025-07-01 05:47:18.004 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:47:18.011 # identical
2025-07-01 05:47:18.017
2025-07-01 05:47:18.024 # pump out diffs from before the synch point
2025-07-01 05:47:18.030 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:47:18.040
2025-07-01 05:47:18.049 # do intraline marking on the synch pair
2025-07-01 05:47:18.057 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:47:18.063 if eqi is None:
2025-07-01 05:47:18.068 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:47:18.074 atags = btags = ""
2025-07-01 05:47:18.080 cruncher.set_seqs(aelt, belt)
2025-07-01 05:47:18.086 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:47:18.093 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:47:18.105 if tag == 'replace':
2025-07-01 05:47:18.114 atags += '^' * la
2025-07-01 05:47:18.121 btags += '^' * lb
2025-07-01 05:47:18.128 elif tag == 'delete':
2025-07-01 05:47:18.135 atags += '-' * la
2025-07-01 05:47:18.144 elif tag == 'insert':
2025-07-01 05:47:18.154 btags += '+' * lb
2025-07-01 05:47:18.164 elif tag == 'equal':
2025-07-01 05:47:18.174 atags += ' ' * la
2025-07-01 05:47:18.182 btags += ' ' * lb
2025-07-01 05:47:18.188 else:
2025-07-01 05:47:18.194 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:47:18.201 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:47:18.207 else:
2025-07-01 05:47:18.214 # the synch pair is identical
2025-07-01 05:47:18.221 yield ' ' + aelt
2025-07-01 05:47:18.228
2025-07-01 05:47:18.234 # pump out diffs from after the synch point
2025-07-01 05:47:18.241 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:47:18.248
2025-07-01 05:47:18.255 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:47:18.262 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:47:18.272
2025-07-01 05:47:18.284 self = <difflib.Differ object at [hex]>
2025-07-01 05:47:18.294 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:47:18.301 alo = 220, ahi = 1101
2025-07-01 05:47:18.308 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:47:18.313 blo = 220, bhi = 1101
2025-07-01 05:47:18.319
2025-07-01 05:47:18.325 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:47:18.330 g = []
2025-07-01 05:47:18.336 if alo < ahi:
2025-07-01 05:47:18.341 if blo < bhi:
2025-07-01 05:47:18.353 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:47:18.364 else:
2025-07-01 05:47:18.371 g = self._dump('-', a, alo, ahi)
2025-07-01 05:47:18.378 elif blo < bhi:
2025-07-01 05:47:18.383 g = self._dump('+', b, blo, bhi)
2025-07-01 05:47:18.388
2025-07-01 05:47:18.393 > yield from g
2025-07-01 05:47:18.398
2025-07-01 05:47:18.403 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:47:18.409 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:47:18.415
2025-07-01 05:47:18.422 self = <difflib.Differ object at [hex]>
2025-07-01 05:47:18.430 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:47:18.437 alo = 220, ahi = 1101
2025-07-01 05:47:18.445 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:47:18.452 blo = 220, bhi = 1101
2025-07-01 05:47:18.458
2025-07-01 05:47:18.465 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:47:18.472 r"""
2025-07-01 05:47:18.479 When replacing one block of lines with another, search the blocks
2025-07-01 05:47:18.487 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:47:18.498 synch point, and intraline difference marking is done on the
2025-07-01 05:47:18.507 similar pair. Lots of work, but often worth it.
2025-07-01 05:47:18.513
2025-07-01 05:47:18.519 Example:
2025-07-01 05:47:18.525
2025-07-01 05:47:18.531 >>> d = Differ()
2025-07-01 05:47:18.537 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:47:18.544 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:47:18.551 >>> print(''.join(results), end="")
2025-07-01 05:47:18.557 - abcDefghiJkl
2025-07-01 05:47:18.572 + abcdefGhijkl
2025-07-01 05:47:18.588 """
2025-07-01 05:47:18.596
2025-07-01 05:47:18.604 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:47:18.613 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:47:18.622 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:47:18.636 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:47:18.647 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:47:18.657
2025-07-01 05:47:18.664 # search for the pair that matches best without being identical
2025-07-01 05:47:18.671 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:47:18.684 # on junk -- unless we have to)
2025-07-01 05:47:18.695 for j in range(blo, bhi):
2025-07-01 05:47:18.701 bj = b[j]
2025-07-01 05:47:18.706 cruncher.set_seq2(bj)
2025-07-01 05:47:18.711 for i in range(alo, ahi):
2025-07-01 05:47:18.717 ai = a[i]
2025-07-01 05:47:18.722 if ai == bj:
2025-07-01 05:47:18.732 if eqi is None:
2025-07-01 05:47:18.742 eqi, eqj = i, j
2025-07-01 05:47:18.749 continue
2025-07-01 05:47:18.756 cruncher.set_seq1(ai)
2025-07-01 05:47:18.763 # computing similarity is expensive, so use the quick
2025-07-01 05:47:18.769 # upper bounds first -- have seen this speed up messy
2025-07-01 05:47:18.774 # compares by a factor of 3.
2025-07-01 05:47:18.780 # note that ratio() is only expensive to compute the first
2025-07-01 05:47:18.786 # time it's called on a sequence pair; the expensive part
2025-07-01 05:47:18.796 # of the computation is cached by cruncher
2025-07-01 05:47:18.808 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:47:18.818 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:47:18.826 cruncher.ratio() > best_ratio:
2025-07-01 05:47:18.836 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:47:18.846 if best_ratio < cutoff:
2025-07-01 05:47:18.855 # no non-identical "pretty close" pair
2025-07-01 05:47:18.867 if eqi is None:
2025-07-01 05:47:18.878 # no identical pair either -- treat it as a straight replace
2025-07-01 05:47:18.891 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:47:18.899 return
2025-07-01 05:47:18.907 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:47:18.913 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:47:18.920 else:
2025-07-01 05:47:18.926 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:47:18.935 eqi = None
2025-07-01 05:47:18.943
2025-07-01 05:47:18.953 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:47:18.966 # identical
2025-07-01 05:47:18.977
2025-07-01 05:47:18.985 # pump out diffs from before the synch point
2025-07-01 05:47:18.992 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:47:18.998
2025-07-01 05:47:19.009 # do intraline marking on the synch pair
2025-07-01 05:47:19.020 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:47:19.032 if eqi is None:
2025-07-01 05:47:19.044 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:47:19.053 atags = btags = ""
2025-07-01 05:47:19.061 cruncher.set_seqs(aelt, belt)
2025-07-01 05:47:19.068 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:47:19.076 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:47:19.083 if tag == 'replace':
2025-07-01 05:47:19.095 atags += '^' * la
2025-07-01 05:47:19.104 btags += '^' * lb
2025-07-01 05:47:19.110 elif tag == 'delete':
2025-07-01 05:47:19.119 atags += '-' * la
2025-07-01 05:47:19.129 elif tag == 'insert':
2025-07-01 05:47:19.138 btags += '+' * lb
2025-07-01 05:47:19.150 elif tag == 'equal':
2025-07-01 05:47:19.158 atags += ' ' * la
2025-07-01 05:47:19.166 btags += ' ' * lb
2025-07-01 05:47:19.171 else:
2025-07-01 05:47:19.177 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:47:19.181 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:47:19.186 else:
2025-07-01 05:47:19.190 # the synch pair is identical
2025-07-01 05:47:19.194 yield ' ' + aelt
2025-07-01 05:47:19.199
2025-07-01 05:47:19.203 # pump out diffs from after the synch point
2025-07-01 05:47:19.207 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:47:19.212
2025-07-01 05:47:19.216 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:47:19.220 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:47:19.225
2025-07-01 05:47:19.230 self = <difflib.Differ object at [hex]>
2025-07-01 05:47:19.237 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:47:19.246 alo = 221, ahi = 1101
2025-07-01 05:47:19.254 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:47:19.260 blo = 221, bhi = 1101
2025-07-01 05:47:19.266
2025-07-01 05:47:19.273 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:47:19.278 g = []
2025-07-01 05:47:19.283 if alo < ahi:
2025-07-01 05:47:19.294 if blo < bhi:
2025-07-01 05:47:19.302 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:47:19.308 else:
2025-07-01 05:47:19.315 g = self._dump('-', a, alo, ahi)
2025-07-01 05:47:19.324 elif blo < bhi:
2025-07-01 05:47:19.332 g = self._dump('+', b, blo, bhi)
2025-07-01 05:47:19.339
2025-07-01 05:47:19.344 > yield from g
2025-07-01 05:47:19.350
2025-07-01 05:47:19.355 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:47:19.361 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:47:19.367
2025-07-01 05:47:19.376 self = <difflib.Differ object at [hex]>
2025-07-01 05:47:19.382 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:47:19.388 alo = 221, ahi = 1101
2025-07-01 05:47:19.393 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:47:19.397 blo = 221, bhi = 1101
2025-07-01 05:47:19.402
2025-07-01 05:47:19.407 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:47:19.413 r"""
2025-07-01 05:47:19.418 When replacing one block of lines with another, search the blocks
2025-07-01 05:47:19.423 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:47:19.429 synch point, and intraline difference marking is done on the
2025-07-01 05:47:19.434 similar pair. Lots of work, but often worth it.
2025-07-01 05:47:19.440
2025-07-01 05:47:19.446 Example:
2025-07-01 05:47:19.454
2025-07-01 05:47:19.462 >>> d = Differ()
2025-07-01 05:47:19.469 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:47:19.474 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:47:19.480 >>> print(''.join(results), end="")
2025-07-01 05:47:19.485 - abcDefghiJkl
2025-07-01 05:47:19.494 + abcdefGhijkl
2025-07-01 05:47:19.503 """
2025-07-01 05:47:19.507
2025-07-01 05:47:19.514 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:47:19.520 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:47:19.525 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:47:19.537 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:47:19.546 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:47:19.554
2025-07-01 05:47:19.562 # search for the pair that matches best without being identical
2025-07-01 05:47:19.569 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:47:19.575 # on junk -- unless we have to)
2025-07-01 05:47:19.581 for j in range(blo, bhi):
2025-07-01 05:47:19.588 bj = b[j]
2025-07-01 05:47:19.597 cruncher.set_seq2(bj)
2025-07-01 05:47:19.610 for i in range(alo, ahi):
2025-07-01 05:47:19.619 ai = a[i]
2025-07-01 05:47:19.627 if ai == bj:
2025-07-01 05:47:19.640 if eqi is None:
2025-07-01 05:47:19.653 eqi, eqj = i, j
2025-07-01 05:47:19.666 continue
2025-07-01 05:47:19.678 cruncher.set_seq1(ai)
2025-07-01 05:47:19.685 # computing similarity is expensive, so use the quick
2025-07-01 05:47:19.691 # upper bounds first -- have seen this speed up messy
2025-07-01 05:47:19.697 # compares by a factor of 3.
2025-07-01 05:47:19.703 # note that ratio() is only expensive to compute the first
2025-07-01 05:47:19.710 # time it's called on a sequence pair; the expensive part
2025-07-01 05:47:19.717 # of the computation is cached by cruncher
2025-07-01 05:47:19.724 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:47:19.731 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:47:19.738 cruncher.ratio() > best_ratio:
2025-07-01 05:47:19.745 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:47:19.751 if best_ratio < cutoff:
2025-07-01 05:47:19.759 # no non-identical "pretty close" pair
2025-07-01 05:47:19.770 if eqi is None:
2025-07-01 05:47:19.779 # no identical pair either -- treat it as a straight replace
2025-07-01 05:47:19.786 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:47:19.791 return
2025-07-01 05:47:19.797 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:47:19.803 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:47:19.808 else:
2025-07-01 05:47:19.814 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:47:19.822 eqi = None
2025-07-01 05:47:19.829
2025-07-01 05:47:19.835 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:47:19.840 # identical
2025-07-01 05:47:19.845
2025-07-01 05:47:19.850 # pump out diffs from before the synch point
2025-07-01 05:47:19.855 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:47:19.863
2025-07-01 05:47:19.872 # do intraline marking on the synch pair
2025-07-01 05:47:19.879 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:47:19.887 if eqi is None:
2025-07-01 05:47:19.895 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:47:19.902 atags = btags = ""
2025-07-01 05:47:19.908 cruncher.set_seqs(aelt, belt)
2025-07-01 05:47:19.914 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:47:19.920 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:47:19.926 if tag == 'replace':
2025-07-01 05:47:19.931 atags += '^' * la
2025-07-01 05:47:19.939 btags += '^' * lb
2025-07-01 05:47:19.948 elif tag == 'delete':
2025-07-01 05:47:19.960 atags += '-' * la
2025-07-01 05:47:19.969 elif tag == 'insert':
2025-07-01 05:47:19.981 btags += '+' * lb
2025-07-01 05:47:19.991 elif tag == 'equal':
2025-07-01 05:47:19.999 atags += ' ' * la
2025-07-01 05:47:20.005 btags += ' ' * lb
2025-07-01 05:47:20.011 else:
2025-07-01 05:47:20.017 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:47:20.025 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:47:20.032 else:
2025-07-01 05:47:20.038 # the synch pair is identical
2025-07-01 05:47:20.044 yield ' ' + aelt
2025-07-01 05:47:20.051
2025-07-01 05:47:20.058 # pump out diffs from after the synch point
2025-07-01 05:47:20.065 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:47:20.071
2025-07-01 05:47:20.079 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:47:20.085 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:47:20.090
2025-07-01 05:47:20.095 self = <difflib.Differ object at [hex]>
2025-07-01 05:47:20.100 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:47:20.104 alo = 224, ahi = 1101
2025-07-01 05:47:20.110 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:47:20.118 blo = 224, bhi = 1101
2025-07-01 05:47:20.124
2025-07-01 05:47:20.130 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:47:20.143 g = []
2025-07-01 05:47:20.153 if alo < ahi:
2025-07-01 05:47:20.161 if blo < bhi:
2025-07-01 05:47:20.172 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:47:20.182 else:
2025-07-01 05:47:20.192 g = self._dump('-', a, alo, ahi)
2025-07-01 05:47:20.202 elif blo < bhi:
2025-07-01 05:47:20.214 g = self._dump('+', b, blo, bhi)
2025-07-01 05:47:20.225
2025-07-01 05:47:20.233 > yield from g
2025-07-01 05:47:20.240
2025-07-01 05:47:20.246 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:47:20.251 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:47:20.256
2025-07-01 05:47:20.260 self = <difflib.Differ object at [hex]>
2025-07-01 05:47:20.265 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:47:20.269 alo = 224, ahi = 1101
2025-07-01 05:47:20.274 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:47:20.279 blo = 224, bhi = 1101
2025-07-01 05:47:20.285
2025-07-01 05:47:20.291 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:47:20.297 r"""
2025-07-01 05:47:20.303 When replacing one block of lines with another, search the blocks
2025-07-01 05:47:20.309 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:47:20.313 synch point, and intraline difference marking is done on the
2025-07-01 05:47:20.318 similar pair. Lots of work, but often worth it.
2025-07-01 05:47:20.322
2025-07-01 05:47:20.326 Example:
2025-07-01 05:47:20.331
2025-07-01 05:47:20.335 >>> d = Differ()
2025-07-01 05:47:20.339 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:47:20.344 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:47:20.348 >>> print(''.join(results), end="")
2025-07-01 05:47:20.353 - abcDefghiJkl
2025-07-01 05:47:20.361 + abcdefGhijkl
2025-07-01 05:47:20.370 """
2025-07-01 05:47:20.374
2025-07-01 05:47:20.379 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:47:20.387 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:47:20.393 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:47:20.398 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:47:20.402 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:47:20.407
2025-07-01 05:47:20.414 # search for the pair that matches best without being identical
2025-07-01 05:47:20.420 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:47:20.426 # on junk -- unless we have to)
2025-07-01 05:47:20.431 for j in range(blo, bhi):
2025-07-01 05:47:20.435 bj = b[j]
2025-07-01 05:47:20.439 cruncher.set_seq2(bj)
2025-07-01 05:47:20.444 for i in range(alo, ahi):
2025-07-01 05:47:20.448 ai = a[i]
2025-07-01 05:47:20.453 if ai == bj:
2025-07-01 05:47:20.457 if eqi is None:
2025-07-01 05:47:20.461 eqi, eqj = i, j
2025-07-01 05:47:20.466 continue
2025-07-01 05:47:20.470 cruncher.set_seq1(ai)
2025-07-01 05:47:20.474 # computing similarity is expensive, so use the quick
2025-07-01 05:47:20.479 # upper bounds first -- have seen this speed up messy
2025-07-01 05:47:20.483 # compares by a factor of 3.
2025-07-01 05:47:20.487 # note that ratio() is only expensive to compute the first
2025-07-01 05:47:20.492 # time it's called on a sequence pair; the expensive part
2025-07-01 05:47:20.497 # of the computation is cached by cruncher
2025-07-01 05:47:20.501 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:47:20.506 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:47:20.510 cruncher.ratio() > best_ratio:
2025-07-01 05:47:20.514 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:47:20.519 if best_ratio < cutoff:
2025-07-01 05:47:20.523 # no non-identical "pretty close" pair
2025-07-01 05:47:20.527 if eqi is None:
2025-07-01 05:47:20.532 # no identical pair either -- treat it as a straight replace
2025-07-01 05:47:20.536 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:47:20.540 return
2025-07-01 05:47:20.545 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:47:20.549 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:47:20.553 else:
2025-07-01 05:47:20.558 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:47:20.562 eqi = None
2025-07-01 05:47:20.572
2025-07-01 05:47:20.581 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:47:20.588 # identical
2025-07-01 05:47:20.595
2025-07-01 05:47:20.601 # pump out diffs from before the synch point
2025-07-01 05:47:20.606 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:47:20.612
2025-07-01 05:47:20.626 # do intraline marking on the synch pair
2025-07-01 05:47:20.636 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:47:20.644 if eqi is None:
2025-07-01 05:47:20.650 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:47:20.660 atags = btags = ""
2025-07-01 05:47:20.671 cruncher.set_seqs(aelt, belt)
2025-07-01 05:47:20.679 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:47:20.687 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:47:20.697 if tag == 'replace':
2025-07-01 05:47:20.705 atags += '^' * la
2025-07-01 05:47:20.713 btags += '^' * lb
2025-07-01 05:47:20.722 elif tag == 'delete':
2025-07-01 05:47:20.733 atags += '-' * la
2025-07-01 05:47:20.743 elif tag == 'insert':
2025-07-01 05:47:20.752 btags += '+' * lb
2025-07-01 05:47:20.760 elif tag == 'equal':
2025-07-01 05:47:20.766 atags += ' ' * la
2025-07-01 05:47:20.772 btags += ' ' * lb
2025-07-01 05:47:20.778 else:
2025-07-01 05:47:20.787 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:47:20.800 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:47:20.810 else:
2025-07-01 05:47:20.817 # the synch pair is identical
2025-07-01 05:47:20.824 yield ' ' + aelt
2025-07-01 05:47:20.830
2025-07-01 05:47:20.840 # pump out diffs from after the synch point
2025-07-01 05:47:20.849 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:47:20.856
2025-07-01 05:47:20.862 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:47:20.870 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:47:20.876
2025-07-01 05:47:20.882 self = <difflib.Differ object at [hex]>
2025-07-01 05:47:20.888 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:47:20.895 alo = 225, ahi = 1101
2025-07-01 05:47:20.902 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:47:20.908 blo = 225, bhi = 1101
2025-07-01 05:47:20.915
2025-07-01 05:47:20.926 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:47:20.936 g = []
2025-07-01 05:47:20.945 if alo < ahi:
2025-07-01 05:47:20.954 if blo < bhi:
2025-07-01 05:47:20.966 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:47:20.975 else:
2025-07-01 05:47:20.983 g = self._dump('-', a, alo, ahi)
2025-07-01 05:47:20.990 elif blo < bhi:
2025-07-01 05:47:20.997 g = self._dump('+', b, blo, bhi)
2025-07-01 05:47:21.004
2025-07-01 05:47:21.010 > yield from g
2025-07-01 05:47:21.015
2025-07-01 05:47:21.019 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:47:21.026 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:47:21.032
2025-07-01 05:47:21.039 self = <difflib.Differ object at [hex]>
2025-07-01 05:47:21.051 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:47:21.059 alo = 225, ahi = 1101
2025-07-01 05:47:21.066 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:47:21.075 blo = 225, bhi = 1101
2025-07-01 05:47:21.085
2025-07-01 05:47:21.096 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:47:21.107 r"""
2025-07-01 05:47:21.116 When replacing one block of lines with another, search the blocks
2025-07-01 05:47:21.123 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:47:21.130 synch point, and intraline difference marking is done on the
2025-07-01 05:47:21.136 similar pair. Lots of work, but often worth it.
2025-07-01 05:47:21.143
2025-07-01 05:47:21.153 Example:
2025-07-01 05:47:21.160
2025-07-01 05:47:21.166 >>> d = Differ()
2025-07-01 05:47:21.175 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:47:21.184 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:47:21.193 >>> print(''.join(results), end="")
2025-07-01 05:47:21.199 - abcDefghiJkl
2025-07-01 05:47:21.209 + abcdefGhijkl
2025-07-01 05:47:21.219 """
2025-07-01 05:47:21.225
2025-07-01 05:47:21.230 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:47:21.236 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:47:21.241 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:47:21.249 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:47:21.256 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:47:21.262
2025-07-01 05:47:21.268 # search for the pair that matches best without being identical
2025-07-01 05:47:21.274 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:47:21.279 # on junk -- unless we have to)
2025-07-01 05:47:21.285 for j in range(blo, bhi):
2025-07-01 05:47:21.291 bj = b[j]
2025-07-01 05:47:21.297 cruncher.set_seq2(bj)
2025-07-01 05:47:21.304 for i in range(alo, ahi):
2025-07-01 05:47:21.311 ai = a[i]
2025-07-01 05:47:21.318 if ai == bj:
2025-07-01 05:47:21.330 if eqi is None:
2025-07-01 05:47:21.338 eqi, eqj = i, j
2025-07-01 05:47:21.345 continue
2025-07-01 05:47:21.350 cruncher.set_seq1(ai)
2025-07-01 05:47:21.355 # computing similarity is expensive, so use the quick
2025-07-01 05:47:21.360 # upper bounds first -- have seen this speed up messy
2025-07-01 05:47:21.364 # compares by a factor of 3.
2025-07-01 05:47:21.370 # note that ratio() is only expensive to compute the first
2025-07-01 05:47:21.376 # time it's called on a sequence pair; the expensive part
2025-07-01 05:47:21.380 # of the computation is cached by cruncher
2025-07-01 05:47:21.385 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:47:21.390 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:47:21.395 cruncher.ratio() > best_ratio:
2025-07-01 05:47:21.401 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:47:21.406 if best_ratio < cutoff:
2025-07-01 05:47:21.412 # no non-identical "pretty close" pair
2025-07-01 05:47:21.418 if eqi is None:
2025-07-01 05:47:21.425 # no identical pair either -- treat it as a straight replace
2025-07-01 05:47:21.433 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:47:21.441 return
2025-07-01 05:47:21.446 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:47:21.451 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:47:21.455 else:
2025-07-01 05:47:21.460 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:47:21.466 eqi = None
2025-07-01 05:47:21.475
2025-07-01 05:47:21.485 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:47:21.490 # identical
2025-07-01 05:47:21.499
2025-07-01 05:47:21.508 # pump out diffs from before the synch point
2025-07-01 05:47:21.515 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:47:21.523
2025-07-01 05:47:21.531 # do intraline marking on the synch pair
2025-07-01 05:47:21.537 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:47:21.543 if eqi is None:
2025-07-01 05:47:21.550 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:47:21.560 atags = btags = ""
2025-07-01 05:47:21.570 cruncher.set_seqs(aelt, belt)
2025-07-01 05:47:21.578 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:47:21.584 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:47:21.589 if tag == 'replace':
2025-07-01 05:47:21.594 atags += '^' * la
2025-07-01 05:47:21.598 btags += '^' * lb
2025-07-01 05:47:21.603 elif tag == 'delete':
2025-07-01 05:47:21.607 atags += '-' * la
2025-07-01 05:47:21.612 elif tag == 'insert':
2025-07-01 05:47:21.616 btags += '+' * lb
2025-07-01 05:47:21.621 elif tag == 'equal':
2025-07-01 05:47:21.626 atags += ' ' * la
2025-07-01 05:47:21.630 btags += ' ' * lb
2025-07-01 05:47:21.642 else:
2025-07-01 05:47:21.652 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:47:21.658 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:47:21.664 else:
2025-07-01 05:47:21.670 # the synch pair is identical
2025-07-01 05:47:21.675 yield ' ' + aelt
2025-07-01 05:47:21.679
2025-07-01 05:47:21.684 # pump out diffs from after the synch point
2025-07-01 05:47:21.689 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:47:21.694
2025-07-01 05:47:21.698 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:47:21.703 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:47:21.708
2025-07-01 05:47:21.713 self = <difflib.Differ object at [hex]>
2025-07-01 05:47:21.719 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:47:21.725 alo = 226, ahi = 1101
2025-07-01 05:47:21.735 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:47:21.743 blo = 226, bhi = 1101
2025-07-01 05:47:21.750
2025-07-01 05:47:21.757 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:47:21.763 g = []
2025-07-01 05:47:21.771 if alo < ahi:
2025-07-01 05:47:21.782 if blo < bhi:
2025-07-01 05:47:21.793 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:47:21.800 else:
2025-07-01 05:47:21.807 g = self._dump('-', a, alo, ahi)
2025-07-01 05:47:21.814 elif blo < bhi:
2025-07-01 05:47:21.823 g = self._dump('+', b, blo, bhi)
2025-07-01 05:47:21.834
2025-07-01 05:47:21.844 > yield from g
2025-07-01 05:47:21.852
2025-07-01 05:47:21.861 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:47:21.872 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:47:21.882
2025-07-01 05:47:21.891 self = <difflib.Differ object at [hex]>
2025-07-01 05:47:21.902 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:47:21.910 alo = 226, ahi = 1101
2025-07-01 05:47:21.923 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:47:21.934 blo = 226, bhi = 1101
2025-07-01 05:47:21.943
2025-07-01 05:47:21.951 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:47:21.957 r"""
2025-07-01 05:47:21.964 When replacing one block of lines with another, search the blocks
2025-07-01 05:47:21.969 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:47:21.975 synch point, and intraline difference marking is done on the
2025-07-01 05:47:21.983 similar pair. Lots of work, but often worth it.
2025-07-01 05:47:21.992
2025-07-01 05:47:22.001 Example:
2025-07-01 05:47:22.008
2025-07-01 05:47:22.017 >>> d = Differ()
2025-07-01 05:47:22.025 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:47:22.032 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:47:22.038 >>> print(''.join(results), end="")
2025-07-01 05:47:22.044 - abcDefghiJkl
2025-07-01 05:47:22.055 + abcdefGhijkl
2025-07-01 05:47:22.066 """
2025-07-01 05:47:22.071
2025-07-01 05:47:22.076 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:47:22.082 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:47:22.086 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:47:22.092 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:47:22.097 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:47:22.103
2025-07-01 05:47:22.108 # search for the pair that matches best without being identical
2025-07-01 05:47:22.114 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:47:22.128 # on junk -- unless we have to)
2025-07-01 05:47:22.140 for j in range(blo, bhi):
2025-07-01 05:47:22.151 bj = b[j]
2025-07-01 05:47:22.163 cruncher.set_seq2(bj)
2025-07-01 05:47:22.173 for i in range(alo, ahi):
2025-07-01 05:47:22.180 ai = a[i]
2025-07-01 05:47:22.187 if ai == bj:
2025-07-01 05:47:22.194 if eqi is None:
2025-07-01 05:47:22.201 eqi, eqj = i, j
2025-07-01 05:47:22.207 continue
2025-07-01 05:47:22.216 cruncher.set_seq1(ai)
2025-07-01 05:47:22.225 # computing similarity is expensive, so use the quick
2025-07-01 05:47:22.231 # upper bounds first -- have seen this speed up messy
2025-07-01 05:47:22.237 # compares by a factor of 3.
2025-07-01 05:47:22.242 # note that ratio() is only expensive to compute the first
2025-07-01 05:47:22.247 # time it's called on a sequence pair; the expensive part
2025-07-01 05:47:22.253 # of the computation is cached by cruncher
2025-07-01 05:47:22.259 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:47:22.265 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:47:22.271 cruncher.ratio() > best_ratio:
2025-07-01 05:47:22.276 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:47:22.282 if best_ratio < cutoff:
2025-07-01 05:47:22.288 # no non-identical "pretty close" pair
2025-07-01 05:47:22.295 if eqi is None:
2025-07-01 05:47:22.302 # no identical pair either -- treat it as a straight replace
2025-07-01 05:47:22.310 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:47:22.317 return
2025-07-01 05:47:22.327 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:47:22.338 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:47:22.345 else:
2025-07-01 05:47:22.354 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:47:22.360 eqi = None
2025-07-01 05:47:22.367
2025-07-01 05:47:22.373 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:47:22.380 # identical
2025-07-01 05:47:22.387
2025-07-01 05:47:22.401 # pump out diffs from before the synch point
2025-07-01 05:47:22.413 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:47:22.422
2025-07-01 05:47:22.430 # do intraline marking on the synch pair
2025-07-01 05:47:22.436 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:47:22.443 if eqi is None:
2025-07-01 05:47:22.455 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:47:22.466 atags = btags = ""
2025-07-01 05:47:22.473 cruncher.set_seqs(aelt, belt)
2025-07-01 05:47:22.480 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:47:22.494 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:47:22.507 if tag == 'replace':
2025-07-01 05:47:22.518 atags += '^' * la
2025-07-01 05:47:22.530 btags += '^' * lb
2025-07-01 05:47:22.541 elif tag == 'delete':
2025-07-01 05:47:22.553 atags += '-' * la
2025-07-01 05:47:22.564 elif tag == 'insert':
2025-07-01 05:47:22.574 btags += '+' * lb
2025-07-01 05:47:22.582 elif tag == 'equal':
2025-07-01 05:47:22.593 atags += ' ' * la
2025-07-01 05:47:22.603 btags += ' ' * lb
2025-07-01 05:47:22.615 else:
2025-07-01 05:47:22.626 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:47:22.640 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:47:22.649 else:
2025-07-01 05:47:22.661 # the synch pair is identical
2025-07-01 05:47:22.672 yield ' ' + aelt
2025-07-01 05:47:22.686
2025-07-01 05:47:22.698 # pump out diffs from after the synch point
2025-07-01 05:47:22.709 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:47:22.717
2025-07-01 05:47:22.727 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:47:22.739 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:47:22.748
2025-07-01 05:47:22.756 self = <difflib.Differ object at [hex]>
2025-07-01 05:47:22.765 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:47:22.770 alo = 227, ahi = 1101
2025-07-01 05:47:22.778 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:47:22.789 blo = 227, bhi = 1101
2025-07-01 05:47:22.801
2025-07-01 05:47:22.810 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:47:22.821 g = []
2025-07-01 05:47:22.832 if alo < ahi:
2025-07-01 05:47:22.840 if blo < bhi:
2025-07-01 05:47:22.846 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:47:22.851 else:
2025-07-01 05:47:22.856 g = self._dump('-', a, alo, ahi)
2025-07-01 05:47:22.864 elif blo < bhi:
2025-07-01 05:47:22.872 g = self._dump('+', b, blo, bhi)
2025-07-01 05:47:22.878
2025-07-01 05:47:22.884 > yield from g
2025-07-01 05:47:22.891
2025-07-01 05:47:22.897 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:47:22.910 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:47:22.922
2025-07-01 05:47:22.932 self = <difflib.Differ object at [hex]>
2025-07-01 05:47:22.939 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:47:22.949 alo = 227, ahi = 1101
2025-07-01 05:47:22.956 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:47:22.963 blo = 227, bhi = 1101
2025-07-01 05:47:22.968
2025-07-01 05:47:22.973 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:47:22.977 r"""
2025-07-01 05:47:22.982 When replacing one block of lines with another, search the blocks
2025-07-01 05:47:22.986 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:47:22.991 synch point, and intraline difference marking is done on the
2025-07-01 05:47:22.997 similar pair. Lots of work, but often worth it.
2025-07-01 05:47:23.002
2025-07-01 05:47:23.008 Example:
2025-07-01 05:47:23.028
2025-07-01 05:47:23.038 >>> d = Differ()
2025-07-01 05:47:23.047 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:47:23.056 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:47:23.063 >>> print(''.join(results), end="")
2025-07-01 05:47:23.071 - abcDefghiJkl
2025-07-01 05:47:23.093 + abcdefGhijkl
2025-07-01 05:47:23.116 """
2025-07-01 05:47:23.123
2025-07-01 05:47:23.131 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:47:23.138 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:47:23.149 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:47:23.158 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:47:23.169 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:47:23.182
2025-07-01 05:47:23.195 # search for the pair that matches best without being identical
2025-07-01 05:47:23.205 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:47:23.217 # on junk -- unless we have to)
2025-07-01 05:47:23.227 for j in range(blo, bhi):
2025-07-01 05:47:23.235 bj = b[j]
2025-07-01 05:47:23.246 cruncher.set_seq2(bj)
2025-07-01 05:47:23.257 for i in range(alo, ahi):
2025-07-01 05:47:23.265 ai = a[i]
2025-07-01 05:47:23.273 if ai == bj:
2025-07-01 05:47:23.279 if eqi is None:
2025-07-01 05:47:23.286 eqi, eqj = i, j
2025-07-01 05:47:23.291 continue
2025-07-01 05:47:23.299 cruncher.set_seq1(ai)
2025-07-01 05:47:23.309 # computing similarity is expensive, so use the quick
2025-07-01 05:47:23.317 # upper bounds first -- have seen this speed up messy
2025-07-01 05:47:23.324 # compares by a factor of 3.
2025-07-01 05:47:23.336 # note that ratio() is only expensive to compute the first
2025-07-01 05:47:23.347 # time it's called on a sequence pair; the expensive part
2025-07-01 05:47:23.357 # of the computation is cached by cruncher
2025-07-01 05:47:23.365 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:47:23.372 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:47:23.378 cruncher.ratio() > best_ratio:
2025-07-01 05:47:23.384 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:47:23.390 if best_ratio < cutoff:
2025-07-01 05:47:23.397 # no non-identical "pretty close" pair
2025-07-01 05:47:23.404 if eqi is None:
2025-07-01 05:47:23.412 # no identical pair either -- treat it as a straight replace
2025-07-01 05:47:23.418 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:47:23.424 return
2025-07-01 05:47:23.437 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:47:23.449 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:47:23.459 else:
2025-07-01 05:47:23.468 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:47:23.475 eqi = None
2025-07-01 05:47:23.483
2025-07-01 05:47:23.493 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:47:23.501 # identical
2025-07-01 05:47:23.510
2025-07-01 05:47:23.518 # pump out diffs from before the synch point
2025-07-01 05:47:23.530 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:47:23.540
2025-07-01 05:47:23.552 # do intraline marking on the synch pair
2025-07-01 05:47:23.564 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:47:23.577 if eqi is None:
2025-07-01 05:47:23.590 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:47:23.600 atags = btags = ""
2025-07-01 05:47:23.607 cruncher.set_seqs(aelt, belt)
2025-07-01 05:47:23.621 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:47:23.633 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:47:23.643 if tag == 'replace':
2025-07-01 05:47:23.650 atags += '^' * la
2025-07-01 05:47:23.659 btags += '^' * lb
2025-07-01 05:47:23.670 elif tag == 'delete':
2025-07-01 05:47:23.678 atags += '-' * la
2025-07-01 05:47:23.690 elif tag == 'insert':
2025-07-01 05:47:23.702 btags += '+' * lb
2025-07-01 05:47:23.714 elif tag == 'equal':
2025-07-01 05:47:23.725 atags += ' ' * la
2025-07-01 05:47:23.733 btags += ' ' * lb
2025-07-01 05:47:23.740 else:
2025-07-01 05:47:23.751 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:47:23.763 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:47:23.773 else:
2025-07-01 05:47:23.781 # the synch pair is identical
2025-07-01 05:47:23.788 yield ' ' + aelt
2025-07-01 05:47:23.795
2025-07-01 05:47:23.802 # pump out diffs from after the synch point
2025-07-01 05:47:23.811 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:47:23.820
2025-07-01 05:47:23.827 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:47:23.839 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:47:23.849
2025-07-01 05:47:23.857 self = <difflib.Differ object at [hex]>
2025-07-01 05:47:23.865 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:47:23.870 alo = 228, ahi = 1101
2025-07-01 05:47:23.876 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:47:23.882 blo = 228, bhi = 1101
2025-07-01 05:47:23.887
2025-07-01 05:47:23.892 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:47:23.899 g = []
2025-07-01 05:47:23.906 if alo < ahi:
2025-07-01 05:47:23.917 if blo < bhi:
2025-07-01 05:47:23.928 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:47:23.939 else:
2025-07-01 05:47:23.950 g = self._dump('-', a, alo, ahi)
2025-07-01 05:47:23.963 elif blo < bhi:
2025-07-01 05:47:23.973 g = self._dump('+', b, blo, bhi)
2025-07-01 05:47:23.981
2025-07-01 05:47:23.989 > yield from g
2025-07-01 05:47:23.998
2025-07-01 05:47:24.013 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:47:24.026 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:47:24.034
2025-07-01 05:47:24.042 self = <difflib.Differ object at [hex]>
2025-07-01 05:47:24.052 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:47:24.059 alo = 228, ahi = 1101
2025-07-01 05:47:24.076 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:47:24.088 blo = 228, bhi = 1101
2025-07-01 05:47:24.098
2025-07-01 05:47:24.107 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:47:24.116 r"""
2025-07-01 05:47:24.128 When replacing one block of lines with another, search the blocks
2025-07-01 05:47:24.138 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:47:24.147 synch point, and intraline difference marking is done on the
2025-07-01 05:47:24.159 similar pair. Lots of work, but often worth it.
2025-07-01 05:47:24.169
2025-07-01 05:47:24.176 Example:
2025-07-01 05:47:24.181
2025-07-01 05:47:24.187 >>> d = Differ()
2025-07-01 05:47:24.193 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:47:24.199 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:47:24.207 >>> print(''.join(results), end="")
2025-07-01 05:47:24.215 - abcDefghiJkl
2025-07-01 05:47:24.230 + abcdefGhijkl
2025-07-01 05:47:24.243 """
2025-07-01 05:47:24.249
2025-07-01 05:47:24.255 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:47:24.261 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:47:24.267 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:47:24.275 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:47:24.286 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:47:24.297
2025-07-01 05:47:24.309 # search for the pair that matches best without being identical
2025-07-01 05:47:24.317 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:47:24.324 # on junk -- unless we have to)
2025-07-01 05:47:24.330 for j in range(blo, bhi):
2025-07-01 05:47:24.335 bj = b[j]
2025-07-01 05:47:24.341 cruncher.set_seq2(bj)
2025-07-01 05:47:24.347 for i in range(alo, ahi):
2025-07-01 05:47:24.352 ai = a[i]
2025-07-01 05:47:24.358 if ai == bj:
2025-07-01 05:47:24.371 if eqi is None:
2025-07-01 05:47:24.379 eqi, eqj = i, j
2025-07-01 05:47:24.387 continue
2025-07-01 05:47:24.396 cruncher.set_seq1(ai)
2025-07-01 05:47:24.405 # computing similarity is expensive, so use the quick
2025-07-01 05:47:24.412 # upper bounds first -- have seen this speed up messy
2025-07-01 05:47:24.417 # compares by a factor of 3.
2025-07-01 05:47:24.422 # note that ratio() is only expensive to compute the first
2025-07-01 05:47:24.426 # time it's called on a sequence pair; the expensive part
2025-07-01 05:47:24.432 # of the computation is cached by cruncher
2025-07-01 05:47:24.438 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:47:24.445 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:47:24.450 cruncher.ratio() > best_ratio:
2025-07-01 05:47:24.460 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:47:24.469 if best_ratio < cutoff:
2025-07-01 05:47:24.477 # no non-identical "pretty close" pair
2025-07-01 05:47:24.487 if eqi is None:
2025-07-01 05:47:24.498 # no identical pair either -- treat it as a straight replace
2025-07-01 05:47:24.508 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:47:24.519 return
2025-07-01 05:47:24.528 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:47:24.536 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:47:24.542 else:
2025-07-01 05:47:24.549 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:47:24.555 eqi = None
2025-07-01 05:47:24.560
2025-07-01 05:47:24.567 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:47:24.572 # identical
2025-07-01 05:47:24.578
2025-07-01 05:47:24.584 # pump out diffs from before the synch point
2025-07-01 05:47:24.590 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:47:24.595
2025-07-01 05:47:24.603 # do intraline marking on the synch pair
2025-07-01 05:47:24.614 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:47:24.623 if eqi is None:
2025-07-01 05:47:24.630 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:47:24.638 atags = btags = ""
2025-07-01 05:47:24.646 cruncher.set_seqs(aelt, belt)
2025-07-01 05:47:24.656 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:47:24.666 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:47:24.675 if tag == 'replace':
2025-07-01 05:47:24.683 atags += '^' * la
2025-07-01 05:47:24.690 btags += '^' * lb
2025-07-01 05:47:24.701 elif tag == 'delete':
2025-07-01 05:47:24.709 atags += '-' * la
2025-07-01 05:47:24.717 elif tag == 'insert':
2025-07-01 05:47:24.724 btags += '+' * lb
2025-07-01 05:47:24.730 elif tag == 'equal':
2025-07-01 05:47:24.743 atags += ' ' * la
2025-07-01 05:47:24.754 btags += ' ' * lb
2025-07-01 05:47:24.763 else:
2025-07-01 05:47:24.770 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:47:24.779 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:47:24.789 else:
2025-07-01 05:47:24.797 # the synch pair is identical
2025-07-01 05:47:24.807 yield ' ' + aelt
2025-07-01 05:47:24.819
2025-07-01 05:47:24.830 # pump out diffs from after the synch point
2025-07-01 05:47:24.840 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:47:24.847
2025-07-01 05:47:24.854 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:47:24.861 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:47:24.866
2025-07-01 05:47:24.872 self = <difflib.Differ object at [hex]>
2025-07-01 05:47:24.879 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:47:24.887 alo = 229, ahi = 1101
2025-07-01 05:47:24.898 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:47:24.906 blo = 229, bhi = 1101
2025-07-01 05:47:24.915
2025-07-01 05:47:24.927 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:47:24.936 g = []
2025-07-01 05:47:24.943 if alo < ahi:
2025-07-01 05:47:24.953 if blo < bhi:
2025-07-01 05:47:24.962 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:47:24.970 else:
2025-07-01 05:47:24.977 g = self._dump('-', a, alo, ahi)
2025-07-01 05:47:24.983 elif blo < bhi:
2025-07-01 05:47:24.991 g = self._dump('+', b, blo, bhi)
2025-07-01 05:47:25.002
2025-07-01 05:47:25.010 > yield from g
2025-07-01 05:47:25.017
2025-07-01 05:47:25.028 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:47:25.038 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:47:25.045
2025-07-01 05:47:25.056 self = <difflib.Differ object at [hex]>
2025-07-01 05:47:25.066 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:47:25.073 alo = 229, ahi = 1101
2025-07-01 05:47:25.083 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:47:25.092 blo = 229, bhi = 1101
2025-07-01 05:47:25.099
2025-07-01 05:47:25.105 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:47:25.110 r"""
2025-07-01 05:47:25.115 When replacing one block of lines with another, search the blocks
2025-07-01 05:47:25.120 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:47:25.126 synch point, and intraline difference marking is done on the
2025-07-01 05:47:25.136 similar pair. Lots of work, but often worth it.
2025-07-01 05:47:25.146
2025-07-01 05:47:25.153 Example:
2025-07-01 05:47:25.159
2025-07-01 05:47:25.170 >>> d = Differ()
2025-07-01 05:47:25.178 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:47:25.184 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:47:25.190 >>> print(''.join(results), end="")
2025-07-01 05:47:25.195 - abcDefghiJkl
2025-07-01 05:47:25.208 + abcdefGhijkl
2025-07-01 05:47:25.225 """
2025-07-01 05:47:25.231
2025-07-01 05:47:25.238 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:47:25.246 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:47:25.257 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:47:25.266 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:47:25.273 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:47:25.279
2025-07-01 05:47:25.285 # search for the pair that matches best without being identical
2025-07-01 05:47:25.289 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:47:25.298 # on junk -- unless we have to)
2025-07-01 05:47:25.307 for j in range(blo, bhi):
2025-07-01 05:47:25.314 bj = b[j]
2025-07-01 05:47:25.321 cruncher.set_seq2(bj)
2025-07-01 05:47:25.326 for i in range(alo, ahi):
2025-07-01 05:47:25.331 ai = a[i]
2025-07-01 05:47:25.335 if ai == bj:
2025-07-01 05:47:25.339 if eqi is None:
2025-07-01 05:47:25.344 eqi, eqj = i, j
2025-07-01 05:47:25.348 continue
2025-07-01 05:47:25.352 cruncher.set_seq1(ai)
2025-07-01 05:47:25.357 # computing similarity is expensive, so use the quick
2025-07-01 05:47:25.361 # upper bounds first -- have seen this speed up messy
2025-07-01 05:47:25.366 # compares by a factor of 3.
2025-07-01 05:47:25.370 # note that ratio() is only expensive to compute the first
2025-07-01 05:47:25.375 # time it's called on a sequence pair; the expensive part
2025-07-01 05:47:25.379 # of the computation is cached by cruncher
2025-07-01 05:47:25.384 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:47:25.388 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:47:25.393 cruncher.ratio() > best_ratio:
2025-07-01 05:47:25.397 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:47:25.402 if best_ratio < cutoff:
2025-07-01 05:47:25.407 # no non-identical "pretty close" pair
2025-07-01 05:47:25.412 if eqi is None:
2025-07-01 05:47:25.418 # no identical pair either -- treat it as a straight replace
2025-07-01 05:47:25.423 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:47:25.435 return
2025-07-01 05:47:25.447 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:47:25.458 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:47:25.469 else:
2025-07-01 05:47:25.479 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:47:25.489 eqi = None
2025-07-01 05:47:25.498
2025-07-01 05:47:25.507 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:47:25.518 # identical
2025-07-01 05:47:25.531
2025-07-01 05:47:25.541 # pump out diffs from before the synch point
2025-07-01 05:47:25.554 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:47:25.564
2025-07-01 05:47:25.571 # do intraline marking on the synch pair
2025-07-01 05:47:25.581 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:47:25.588 if eqi is None:
2025-07-01 05:47:25.597 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:47:25.603 atags = btags = ""
2025-07-01 05:47:25.610 cruncher.set_seqs(aelt, belt)
2025-07-01 05:47:25.616 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:47:25.622 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:47:25.628 if tag == 'replace':
2025-07-01 05:47:25.636 atags += '^' * la
2025-07-01 05:47:25.649 btags += '^' * lb
2025-07-01 05:47:25.656 elif tag == 'delete':
2025-07-01 05:47:25.664 atags += '-' * la
2025-07-01 05:47:25.670 elif tag == 'insert':
2025-07-01 05:47:25.677 btags += '+' * lb
2025-07-01 05:47:25.683 elif tag == 'equal':
2025-07-01 05:47:25.691 atags += ' ' * la
2025-07-01 05:47:25.702 btags += ' ' * lb
2025-07-01 05:47:25.710 else:
2025-07-01 05:47:25.716 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:47:25.722 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:47:25.730 else:
2025-07-01 05:47:25.743 # the synch pair is identical
2025-07-01 05:47:25.754 yield ' ' + aelt
2025-07-01 05:47:25.765
2025-07-01 05:47:25.779 # pump out diffs from after the synch point
2025-07-01 05:47:25.790 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:47:25.798
2025-07-01 05:47:25.805 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:47:25.811 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:47:25.817
2025-07-01 05:47:25.823 self = <difflib.Differ object at [hex]>
2025-07-01 05:47:25.829 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:47:25.835 alo = 230, ahi = 1101
2025-07-01 05:47:25.841 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:47:25.848 blo = 230, bhi = 1101
2025-07-01 05:47:25.855
2025-07-01 05:47:25.860 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:47:25.866 g = []
2025-07-01 05:47:25.872 if alo < ahi:
2025-07-01 05:47:25.878 if blo < bhi:
2025-07-01 05:47:25.887 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:47:25.896 else:
2025-07-01 05:47:25.903 g = self._dump('-', a, alo, ahi)
2025-07-01 05:47:25.909 elif blo < bhi:
2025-07-01 05:47:25.916 g = self._dump('+', b, blo, bhi)
2025-07-01 05:47:25.923
2025-07-01 05:47:25.931 > yield from g
2025-07-01 05:47:25.942
2025-07-01 05:47:25.949 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:47:25.955 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:47:25.961
2025-07-01 05:47:25.966 self = <difflib.Differ object at [hex]>
2025-07-01 05:47:25.975 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:47:25.981 alo = 230, ahi = 1101
2025-07-01 05:47:25.994 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:47:26.004 blo = 230, bhi = 1101
2025-07-01 05:47:26.012
2025-07-01 05:47:26.019 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:47:26.025 r"""
2025-07-01 05:47:26.031 When replacing one block of lines with another, search the blocks
2025-07-01 05:47:26.038 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:47:26.044 synch point, and intraline difference marking is done on the
2025-07-01 05:47:26.051 similar pair. Lots of work, but often worth it.
2025-07-01 05:47:26.062
2025-07-01 05:47:26.073 Example:
2025-07-01 05:47:26.085
2025-07-01 05:47:26.096 >>> d = Differ()
2025-07-01 05:47:26.106 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:47:26.117 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:47:26.128 >>> print(''.join(results), end="")
2025-07-01 05:47:26.137 - abcDefghiJkl
2025-07-01 05:47:26.159 + abcdefGhijkl
2025-07-01 05:47:26.175 """
2025-07-01 05:47:26.182
2025-07-01 05:47:26.189 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:47:26.195 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:47:26.201 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:47:26.211 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:47:26.220 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:47:26.227
2025-07-01 05:47:26.234 # search for the pair that matches best without being identical
2025-07-01 05:47:26.240 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:47:26.247 # on junk -- unless we have to)
2025-07-01 05:47:26.258 for j in range(blo, bhi):
2025-07-01 05:47:26.265 bj = b[j]
2025-07-01 05:47:26.273 cruncher.set_seq2(bj)
2025-07-01 05:47:26.280 for i in range(alo, ahi):
2025-07-01 05:47:26.287 ai = a[i]
2025-07-01 05:47:26.291 if ai == bj:
2025-07-01 05:47:26.296 if eqi is None:
2025-07-01 05:47:26.300 eqi, eqj = i, j
2025-07-01 05:47:26.304 continue
2025-07-01 05:47:26.309 cruncher.set_seq1(ai)
2025-07-01 05:47:26.313 # computing similarity is expensive, so use the quick
2025-07-01 05:47:26.318 # upper bounds first -- have seen this speed up messy
2025-07-01 05:47:26.323 # compares by a factor of 3.
2025-07-01 05:47:26.334 # note that ratio() is only expensive to compute the first
2025-07-01 05:47:26.344 # time it's called on a sequence pair; the expensive part
2025-07-01 05:47:26.355 # of the computation is cached by cruncher
2025-07-01 05:47:26.362 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:47:26.368 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:47:26.374 cruncher.ratio() > best_ratio:
2025-07-01 05:47:26.381 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:47:26.385 if best_ratio < cutoff:
2025-07-01 05:47:26.390 # no non-identical "pretty close" pair
2025-07-01 05:47:26.396 if eqi is None:
2025-07-01 05:47:26.403 # no identical pair either -- treat it as a straight replace
2025-07-01 05:47:26.409 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:47:26.415 return
2025-07-01 05:47:26.424 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:47:26.432 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:47:26.439 else:
2025-07-01 05:47:26.445 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:47:26.449 eqi = None
2025-07-01 05:47:26.454
2025-07-01 05:47:26.459 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:47:26.464 # identical
2025-07-01 05:47:26.470
2025-07-01 05:47:26.480 # pump out diffs from before the synch point
2025-07-01 05:47:26.491 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:47:26.501
2025-07-01 05:47:26.512 # do intraline marking on the synch pair
2025-07-01 05:47:26.525 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:47:26.533 if eqi is None:
2025-07-01 05:47:26.540 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:47:26.549 atags = btags = ""
2025-07-01 05:47:26.557 cruncher.set_seqs(aelt, belt)
2025-07-01 05:47:26.562 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:47:26.567 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:47:26.572 if tag == 'replace':
2025-07-01 05:47:26.577 atags += '^' * la
2025-07-01 05:47:26.582 btags += '^' * lb
2025-07-01 05:47:26.587 elif tag == 'delete':
2025-07-01 05:47:26.591 atags += '-' * la
2025-07-01 05:47:26.596 elif tag == 'insert':
2025-07-01 05:47:26.601 btags += '+' * lb
2025-07-01 05:47:26.607 elif tag == 'equal':
2025-07-01 05:47:26.611 atags += ' ' * la
2025-07-01 05:47:26.616 btags += ' ' * lb
2025-07-01 05:47:26.621 else:
2025-07-01 05:47:26.625 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:47:26.630 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:47:26.634 else:
2025-07-01 05:47:26.638 # the synch pair is identical
2025-07-01 05:47:26.643 yield ' ' + aelt
2025-07-01 05:47:26.648
2025-07-01 05:47:26.653 # pump out diffs from after the synch point
2025-07-01 05:47:26.658 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:47:26.662
2025-07-01 05:47:26.666 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:47:26.671 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:47:26.675
2025-07-01 05:47:26.680 self = <difflib.Differ object at [hex]>
2025-07-01 05:47:26.685 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:47:26.690 alo = 231, ahi = 1101
2025-07-01 05:47:26.694 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:47:26.699 blo = 231, bhi = 1101
2025-07-01 05:47:26.703
2025-07-01 05:47:26.707 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:47:26.711 g = []
2025-07-01 05:47:26.716 if alo < ahi:
2025-07-01 05:47:26.720 if blo < bhi:
2025-07-01 05:47:26.724 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:47:26.729 else:
2025-07-01 05:47:26.733 g = self._dump('-', a, alo, ahi)
2025-07-01 05:47:26.737 elif blo < bhi:
2025-07-01 05:47:26.742 g = self._dump('+', b, blo, bhi)
2025-07-01 05:47:26.746
2025-07-01 05:47:26.750 > yield from g
2025-07-01 05:47:26.755
2025-07-01 05:47:26.759 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:47:26.763 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:47:26.768
2025-07-01 05:47:26.772 self = <difflib.Differ object at [hex]>
2025-07-01 05:47:26.777 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:47:26.781 alo = 231, ahi = 1101
2025-07-01 05:47:26.786 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:47:26.791 blo = 231, bhi = 1101
2025-07-01 05:47:26.796
2025-07-01 05:47:26.801 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:47:26.805 r"""
2025-07-01 05:47:26.809 When replacing one block of lines with another, search the blocks
2025-07-01 05:47:26.814 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:47:26.818 synch point, and intraline difference marking is done on the
2025-07-01 05:47:26.823 similar pair. Lots of work, but often worth it.
2025-07-01 05:47:26.827
2025-07-01 05:47:26.831 Example:
2025-07-01 05:47:26.835
2025-07-01 05:47:26.840 >>> d = Differ()
2025-07-01 05:47:26.844 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:47:26.848 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:47:26.853 >>> print(''.join(results), end="")
2025-07-01 05:47:26.858 - abcDefghiJkl
2025-07-01 05:47:26.870 + abcdefGhijkl
2025-07-01 05:47:26.882 """
2025-07-01 05:47:26.887
2025-07-01 05:47:26.896 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:47:26.907 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:47:26.915 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:47:26.923 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:47:26.930 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:47:26.936
2025-07-01 05:47:26.942 # search for the pair that matches best without being identical
2025-07-01 05:47:26.947 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:47:26.952 # on junk -- unless we have to)
2025-07-01 05:47:26.957 for j in range(blo, bhi):
2025-07-01 05:47:26.963 bj = b[j]
2025-07-01 05:47:26.970 cruncher.set_seq2(bj)
2025-07-01 05:47:26.980 for i in range(alo, ahi):
2025-07-01 05:47:26.989 ai = a[i]
2025-07-01 05:47:26.998 if ai == bj:
2025-07-01 05:47:27.007 if eqi is None:
2025-07-01 05:47:27.017 eqi, eqj = i, j
2025-07-01 05:47:27.027 continue
2025-07-01 05:47:27.035 cruncher.set_seq1(ai)
2025-07-01 05:47:27.047 # computing similarity is expensive, so use the quick
2025-07-01 05:47:27.057 # upper bounds first -- have seen this speed up messy
2025-07-01 05:47:27.069 # compares by a factor of 3.
2025-07-01 05:47:27.078 # note that ratio() is only expensive to compute the first
2025-07-01 05:47:27.087 # time it's called on a sequence pair; the expensive part
2025-07-01 05:47:27.094 # of the computation is cached by cruncher
2025-07-01 05:47:27.104 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:47:27.115 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:47:27.124 cruncher.ratio() > best_ratio:
2025-07-01 05:47:27.138 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:47:27.148 if best_ratio < cutoff:
2025-07-01 05:47:27.160 # no non-identical "pretty close" pair
2025-07-01 05:47:27.170 if eqi is None:
2025-07-01 05:47:27.180 # no identical pair either -- treat it as a straight replace
2025-07-01 05:47:27.189 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:47:27.196 return
2025-07-01 05:47:27.202 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:47:27.212 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:47:27.222 else:
2025-07-01 05:47:27.234 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:47:27.243 eqi = None
2025-07-01 05:47:27.253
2025-07-01 05:47:27.263 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:47:27.272 # identical
2025-07-01 05:47:27.279
2025-07-01 05:47:27.287 # pump out diffs from before the synch point
2025-07-01 05:47:27.297 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:47:27.309
2025-07-01 05:47:27.320 # do intraline marking on the synch pair
2025-07-01 05:47:27.329 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:47:27.336 if eqi is None:
2025-07-01 05:47:27.343 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:47:27.349 atags = btags = ""
2025-07-01 05:47:27.355 cruncher.set_seqs(aelt, belt)
2025-07-01 05:47:27.362 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:47:27.369 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:47:27.376 if tag == 'replace':
2025-07-01 05:47:27.382 atags += '^' * la
2025-07-01 05:47:27.389 btags += '^' * lb
2025-07-01 05:47:27.396 elif tag == 'delete':
2025-07-01 05:47:27.403 atags += '-' * la
2025-07-01 05:47:27.411 elif tag == 'insert':
2025-07-01 05:47:27.423 btags += '+' * lb
2025-07-01 05:47:27.433 elif tag == 'equal':
2025-07-01 05:47:27.440 atags += ' ' * la
2025-07-01 05:47:27.446 btags += ' ' * lb
2025-07-01 05:47:27.450 else:
2025-07-01 05:47:27.456 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:47:27.462 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:47:27.467 else:
2025-07-01 05:47:27.473 # the synch pair is identical
2025-07-01 05:47:27.479 yield ' ' + aelt
2025-07-01 05:47:27.485
2025-07-01 05:47:27.491 # pump out diffs from after the synch point
2025-07-01 05:47:27.498 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:47:27.505
2025-07-01 05:47:27.511 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:47:27.518 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:47:27.524
2025-07-01 05:47:27.530 self = <difflib.Differ object at [hex]>
2025-07-01 05:47:27.536 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:47:27.542 alo = 232, ahi = 1101
2025-07-01 05:47:27.552 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:47:27.559 blo = 232, bhi = 1101
2025-07-01 05:47:27.565
2025-07-01 05:47:27.570 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:47:27.575 g = []
2025-07-01 05:47:27.579 if alo < ahi:
2025-07-01 05:47:27.584 if blo < bhi:
2025-07-01 05:47:27.590 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:47:27.595 else:
2025-07-01 05:47:27.601 g = self._dump('-', a, alo, ahi)
2025-07-01 05:47:27.607 elif blo < bhi:
2025-07-01 05:47:27.613 g = self._dump('+', b, blo, bhi)
2025-07-01 05:47:27.619
2025-07-01 05:47:27.626 > yield from g
2025-07-01 05:47:27.632
2025-07-01 05:47:27.638 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:47:27.644 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:47:27.650
2025-07-01 05:47:27.657 self = <difflib.Differ object at [hex]>
2025-07-01 05:47:27.669 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:47:27.678 alo = 232, ahi = 1101
2025-07-01 05:47:27.685 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:47:27.691 blo = 232, bhi = 1101
2025-07-01 05:47:27.697
2025-07-01 05:47:27.704 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:47:27.712 r"""
2025-07-01 05:47:27.723 When replacing one block of lines with another, search the blocks
2025-07-01 05:47:27.733 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:47:27.740 synch point, and intraline difference marking is done on the
2025-07-01 05:47:27.746 similar pair. Lots of work, but often worth it.
2025-07-01 05:47:27.752
2025-07-01 05:47:27.758 Example:
2025-07-01 05:47:27.764
2025-07-01 05:47:27.771 >>> d = Differ()
2025-07-01 05:47:27.778 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:47:27.785 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:47:27.793 >>> print(''.join(results), end="")
2025-07-01 05:47:27.799 - abcDefghiJkl
2025-07-01 05:47:27.814 + abcdefGhijkl
2025-07-01 05:47:27.827 """
2025-07-01 05:47:27.835
2025-07-01 05:47:27.843 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:47:27.850 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:47:27.856 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:47:27.862 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:47:27.869 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:47:27.875
2025-07-01 05:47:27.883 # search for the pair that matches best without being identical
2025-07-01 05:47:27.894 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:47:27.905 # on junk -- unless we have to)
2025-07-01 05:47:27.914 for j in range(blo, bhi):
2025-07-01 05:47:27.924 bj = b[j]
2025-07-01 05:47:27.932 cruncher.set_seq2(bj)
2025-07-01 05:47:27.942 for i in range(alo, ahi):
2025-07-01 05:47:27.954 ai = a[i]
2025-07-01 05:47:27.967 if ai == bj:
2025-07-01 05:47:27.981 if eqi is None:
2025-07-01 05:47:27.991 eqi, eqj = i, j
2025-07-01 05:47:27.996 continue
2025-07-01 05:47:28.004 cruncher.set_seq1(ai)
2025-07-01 05:47:28.011 # computing similarity is expensive, so use the quick
2025-07-01 05:47:28.016 # upper bounds first -- have seen this speed up messy
2025-07-01 05:47:28.023 # compares by a factor of 3.
2025-07-01 05:47:28.029 # note that ratio() is only expensive to compute the first
2025-07-01 05:47:28.034 # time it's called on a sequence pair; the expensive part
2025-07-01 05:47:28.040 # of the computation is cached by cruncher
2025-07-01 05:47:28.046 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:47:28.051 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:47:28.058 cruncher.ratio() > best_ratio:
2025-07-01 05:47:28.068 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:47:28.079 if best_ratio < cutoff:
2025-07-01 05:47:28.089 # no non-identical "pretty close" pair
2025-07-01 05:47:28.097 if eqi is None:
2025-07-01 05:47:28.105 # no identical pair either -- treat it as a straight replace
2025-07-01 05:47:28.113 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:47:28.120 return
2025-07-01 05:47:28.127 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:47:28.133 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:47:28.139 else:
2025-07-01 05:47:28.146 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:47:28.153 eqi = None
2025-07-01 05:47:28.160
2025-07-01 05:47:28.168 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:47:28.175 # identical
2025-07-01 05:47:28.181
2025-07-01 05:47:28.188 # pump out diffs from before the synch point
2025-07-01 05:47:28.196 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:47:28.203
2025-07-01 05:47:28.209 # do intraline marking on the synch pair
2025-07-01 05:47:28.215 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:47:28.221 if eqi is None:
2025-07-01 05:47:28.227 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:47:28.233 atags = btags = ""
2025-07-01 05:47:28.240 cruncher.set_seqs(aelt, belt)
2025-07-01 05:47:28.247 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:47:28.255 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:47:28.264 if tag == 'replace':
2025-07-01 05:47:28.271 atags += '^' * la
2025-07-01 05:47:28.277 btags += '^' * lb
2025-07-01 05:47:28.289 elif tag == 'delete':
2025-07-01 05:47:28.299 atags += '-' * la
2025-07-01 05:47:28.305 elif tag == 'insert':
2025-07-01 05:47:28.311 btags += '+' * lb
2025-07-01 05:47:28.318 elif tag == 'equal':
2025-07-01 05:47:28.329 atags += ' ' * la
2025-07-01 05:47:28.338 btags += ' ' * lb
2025-07-01 05:47:28.345 else:
2025-07-01 05:47:28.351 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:47:28.357 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:47:28.362 else:
2025-07-01 05:47:28.374 # the synch pair is identical
2025-07-01 05:47:28.385 yield ' ' + aelt
2025-07-01 05:47:28.391
2025-07-01 05:47:28.397 # pump out diffs from after the synch point
2025-07-01 05:47:28.404 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:47:28.411
2025-07-01 05:47:28.417 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:47:28.423 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:47:28.430
2025-07-01 05:47:28.437 self = <difflib.Differ object at [hex]>
2025-07-01 05:47:28.449 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:47:28.456 alo = 233, ahi = 1101
2025-07-01 05:47:28.464 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:47:28.471 blo = 233, bhi = 1101
2025-07-01 05:47:28.479
2025-07-01 05:47:28.489 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:47:28.497 g = []
2025-07-01 05:47:28.504 if alo < ahi:
2025-07-01 05:47:28.511 if blo < bhi:
2025-07-01 05:47:28.518 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:47:28.525 else:
2025-07-01 05:47:28.531 g = self._dump('-', a, alo, ahi)
2025-07-01 05:47:28.538 elif blo < bhi:
2025-07-01 05:47:28.549 g = self._dump('+', b, blo, bhi)
2025-07-01 05:47:28.558
2025-07-01 05:47:28.565 > yield from g
2025-07-01 05:47:28.571
2025-07-01 05:47:28.576 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:47:28.581 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:47:28.586
2025-07-01 05:47:28.592 self = <difflib.Differ object at [hex]>
2025-07-01 05:47:28.598 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:47:28.603 alo = 233, ahi = 1101
2025-07-01 05:47:28.610 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:47:28.621 blo = 233, bhi = 1101
2025-07-01 05:47:28.629
2025-07-01 05:47:28.639 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:47:28.651 r"""
2025-07-01 05:47:28.659 When replacing one block of lines with another, search the blocks
2025-07-01 05:47:28.666 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:47:28.672 synch point, and intraline difference marking is done on the
2025-07-01 05:47:28.678 similar pair. Lots of work, but often worth it.
2025-07-01 05:47:28.684
2025-07-01 05:47:28.690 Example:
2025-07-01 05:47:28.695
2025-07-01 05:47:28.701 >>> d = Differ()
2025-07-01 05:47:28.708 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:47:28.715 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:47:28.723 >>> print(''.join(results), end="")
2025-07-01 05:47:28.734 - abcDefghiJkl
2025-07-01 05:47:28.751 + abcdefGhijkl
2025-07-01 05:47:28.767 """
2025-07-01 05:47:28.773
2025-07-01 05:47:28.778 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:47:28.783 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:47:28.788 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:47:28.792 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:47:28.797 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:47:28.803
2025-07-01 05:47:28.809 # search for the pair that matches best without being identical
2025-07-01 05:47:28.814 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:47:28.822 # on junk -- unless we have to)
2025-07-01 05:47:28.829 for j in range(blo, bhi):
2025-07-01 05:47:28.835 bj = b[j]
2025-07-01 05:47:28.841 cruncher.set_seq2(bj)
2025-07-01 05:47:28.847 for i in range(alo, ahi):
2025-07-01 05:47:28.852 ai = a[i]
2025-07-01 05:47:28.857 if ai == bj:
2025-07-01 05:47:28.867 if eqi is None:
2025-07-01 05:47:28.877 eqi, eqj = i, j
2025-07-01 05:47:28.889 continue
2025-07-01 05:47:28.902 cruncher.set_seq1(ai)
2025-07-01 05:47:28.913 # computing similarity is expensive, so use the quick
2025-07-01 05:47:28.921 # upper bounds first -- have seen this speed up messy
2025-07-01 05:47:28.928 # compares by a factor of 3.
2025-07-01 05:47:28.935 # note that ratio() is only expensive to compute the first
2025-07-01 05:47:28.942 # time it's called on a sequence pair; the expensive part
2025-07-01 05:47:28.950 # of the computation is cached by cruncher
2025-07-01 05:47:28.961 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:47:28.970 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:47:28.981 cruncher.ratio() > best_ratio:
2025-07-01 05:47:28.991 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:47:28.999 if best_ratio < cutoff:
2025-07-01 05:47:29.012 # no non-identical "pretty close" pair
2025-07-01 05:47:29.023 if eqi is None:
2025-07-01 05:47:29.035 # no identical pair either -- treat it as a straight replace
2025-07-01 05:47:29.044 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:47:29.052 return
2025-07-01 05:47:29.059 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:47:29.065 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:47:29.070 else:
2025-07-01 05:47:29.076 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:47:29.082 eqi = None
2025-07-01 05:47:29.087
2025-07-01 05:47:29.095 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:47:29.106 # identical
2025-07-01 05:47:29.114
2025-07-01 05:47:29.121 # pump out diffs from before the synch point
2025-07-01 05:47:29.137 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:47:29.146
2025-07-01 05:47:29.155 # do intraline marking on the synch pair
2025-07-01 05:47:29.168 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:47:29.179 if eqi is None:
2025-07-01 05:47:29.188 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:47:29.195 atags = btags = ""
2025-07-01 05:47:29.203 cruncher.set_seqs(aelt, belt)
2025-07-01 05:47:29.211 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:47:29.221 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:47:29.230 if tag == 'replace':
2025-07-01 05:47:29.237 atags += '^' * la
2025-07-01 05:47:29.243 btags += '^' * lb
2025-07-01 05:47:29.249 elif tag == 'delete':
2025-07-01 05:47:29.257 atags += '-' * la
2025-07-01 05:47:29.268 elif tag == 'insert':
2025-07-01 05:47:29.276 btags += '+' * lb
2025-07-01 05:47:29.283 elif tag == 'equal':
2025-07-01 05:47:29.292 atags += ' ' * la
2025-07-01 05:47:29.300 btags += ' ' * lb
2025-07-01 05:47:29.311 else:
2025-07-01 05:47:29.319 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:47:29.328 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:47:29.336 else:
2025-07-01 05:47:29.349 # the synch pair is identical
2025-07-01 05:47:29.359 yield ' ' + aelt
2025-07-01 05:47:29.368
2025-07-01 05:47:29.379 # pump out diffs from after the synch point
2025-07-01 05:47:29.391 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:47:29.401
2025-07-01 05:47:29.412 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:47:29.425 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:47:29.435
2025-07-01 05:47:29.445 self = <difflib.Differ object at [hex]>
2025-07-01 05:47:29.459 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:47:29.467 alo = 234, ahi = 1101
2025-07-01 05:47:29.476 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:47:29.482 blo = 234, bhi = 1101
2025-07-01 05:47:29.488
2025-07-01 05:47:29.494 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:47:29.499 g = []
2025-07-01 05:47:29.507 if alo < ahi:
2025-07-01 05:47:29.519 if blo < bhi:
2025-07-01 05:47:29.528 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:47:29.536 else:
2025-07-01 05:47:29.544 g = self._dump('-', a, alo, ahi)
2025-07-01 05:47:29.556 elif blo < bhi:
2025-07-01 05:47:29.565 g = self._dump('+', b, blo, bhi)
2025-07-01 05:47:29.575
2025-07-01 05:47:29.586 > yield from g
2025-07-01 05:47:29.595
2025-07-01 05:47:29.608 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:47:29.620 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:47:29.632
2025-07-01 05:47:29.640 self = <difflib.Differ object at [hex]>
2025-07-01 05:47:29.651 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:47:29.664 alo = 234, ahi = 1101
2025-07-01 05:47:29.676 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:47:29.684 blo = 234, bhi = 1101
2025-07-01 05:47:29.691
2025-07-01 05:47:29.699 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:47:29.706 r"""
2025-07-01 05:47:29.713 When replacing one block of lines with another, search the blocks
2025-07-01 05:47:29.725 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:47:29.734 synch point, and intraline difference marking is done on the
2025-07-01 05:47:29.743 similar pair. Lots of work, but often worth it.
2025-07-01 05:47:29.750
2025-07-01 05:47:29.757 Example:
2025-07-01 05:47:29.767
2025-07-01 05:47:29.774 >>> d = Differ()
2025-07-01 05:47:29.784 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:47:29.795 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:47:29.803 >>> print(''.join(results), end="")
2025-07-01 05:47:29.809 - abcDefghiJkl
2025-07-01 05:47:29.821 + abcdefGhijkl
2025-07-01 05:47:29.833 """
2025-07-01 05:47:29.839
2025-07-01 05:47:29.846 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:47:29.854 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:47:29.863 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:47:29.873 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:47:29.881 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:47:29.888
2025-07-01 05:47:29.897 # search for the pair that matches best without being identical
2025-07-01 05:47:29.904 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:47:29.914 # on junk -- unless we have to)
2025-07-01 05:47:29.926 for j in range(blo, bhi):
2025-07-01 05:47:29.937 bj = b[j]
2025-07-01 05:47:29.951 cruncher.set_seq2(bj)
2025-07-01 05:47:29.964 for i in range(alo, ahi):
2025-07-01 05:47:29.974 ai = a[i]
2025-07-01 05:47:29.983 if ai == bj:
2025-07-01 05:47:29.995 if eqi is None:
2025-07-01 05:47:30.004 eqi, eqj = i, j
2025-07-01 05:47:30.012 continue
2025-07-01 05:47:30.019 cruncher.set_seq1(ai)
2025-07-01 05:47:30.026 # computing similarity is expensive, so use the quick
2025-07-01 05:47:30.036 # upper bounds first -- have seen this speed up messy
2025-07-01 05:47:30.043 # compares by a factor of 3.
2025-07-01 05:47:30.050 # note that ratio() is only expensive to compute the first
2025-07-01 05:47:30.061 # time it's called on a sequence pair; the expensive part
2025-07-01 05:47:30.071 # of the computation is cached by cruncher
2025-07-01 05:47:30.080 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:47:30.091 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:47:30.100 cruncher.ratio() > best_ratio:
2025-07-01 05:47:30.107 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:47:30.114 if best_ratio < cutoff:
2025-07-01 05:47:30.120 # no non-identical "pretty close" pair
2025-07-01 05:47:30.127 if eqi is None:
2025-07-01 05:47:30.138 # no identical pair either -- treat it as a straight replace
2025-07-01 05:47:30.146 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:47:30.154 return
2025-07-01 05:47:30.163 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:47:30.170 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:47:30.180 else:
2025-07-01 05:47:30.190 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:47:30.198 eqi = None
2025-07-01 05:47:30.205
2025-07-01 05:47:30.216 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:47:30.226 # identical
2025-07-01 05:47:30.236
2025-07-01 05:47:30.248 # pump out diffs from before the synch point
2025-07-01 05:47:30.260 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:47:30.270
2025-07-01 05:47:30.282 # do intraline marking on the synch pair
2025-07-01 05:47:30.293 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:47:30.301 if eqi is None:
2025-07-01 05:47:30.311 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:47:30.321 atags = btags = ""
2025-07-01 05:47:30.330 cruncher.set_seqs(aelt, belt)
2025-07-01 05:47:30.341 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:47:30.351 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:47:30.359 if tag == 'replace':
2025-07-01 05:47:30.371 atags += '^' * la
2025-07-01 05:47:30.383 btags += '^' * lb
2025-07-01 05:47:30.394 elif tag == 'delete':
2025-07-01 05:47:30.406 atags += '-' * la
2025-07-01 05:47:30.417 elif tag == 'insert':
2025-07-01 05:47:30.425 btags += '+' * lb
2025-07-01 05:47:30.433 elif tag == 'equal':
2025-07-01 05:47:30.446 atags += ' ' * la
2025-07-01 05:47:30.456 btags += ' ' * lb
2025-07-01 05:47:30.463 else:
2025-07-01 05:47:30.470 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:47:30.477 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:47:30.485 else:
2025-07-01 05:47:30.496 # the synch pair is identical
2025-07-01 05:47:30.505 yield ' ' + aelt
2025-07-01 05:47:30.513
2025-07-01 05:47:30.519 # pump out diffs from after the synch point
2025-07-01 05:47:30.526 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:47:30.531
2025-07-01 05:47:30.539 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:47:30.548 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:47:30.556
2025-07-01 05:47:30.564 self = <difflib.Differ object at [hex]>
2025-07-01 05:47:30.572 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:47:30.578 alo = 235, ahi = 1101
2025-07-01 05:47:30.587 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:47:30.596 blo = 235, bhi = 1101
2025-07-01 05:47:30.606
2025-07-01 05:47:30.614 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:47:30.621 g = []
2025-07-01 05:47:30.627 if alo < ahi:
2025-07-01 05:47:30.636 if blo < bhi:
2025-07-01 05:47:30.643 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:47:30.650 else:
2025-07-01 05:47:30.657 g = self._dump('-', a, alo, ahi)
2025-07-01 05:47:30.663 elif blo < bhi:
2025-07-01 05:47:30.669 g = self._dump('+', b, blo, bhi)
2025-07-01 05:47:30.675
2025-07-01 05:47:30.682 > yield from g
2025-07-01 05:47:30.694
2025-07-01 05:47:30.702 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:47:30.715 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:47:30.723
2025-07-01 05:47:30.732 self = <difflib.Differ object at [hex]>
2025-07-01 05:47:30.741 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:47:30.748 alo = 235, ahi = 1101
2025-07-01 05:47:30.756 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:47:30.763 blo = 235, bhi = 1101
2025-07-01 05:47:30.770
2025-07-01 05:47:30.777 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:47:30.783 r"""
2025-07-01 05:47:30.789 When replacing one block of lines with another, search the blocks
2025-07-01 05:47:30.795 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:47:30.802 synch point, and intraline difference marking is done on the
2025-07-01 05:47:30.808 similar pair. Lots of work, but often worth it.
2025-07-01 05:47:30.814
2025-07-01 05:47:30.819 Example:
2025-07-01 05:47:30.832
2025-07-01 05:47:30.842 >>> d = Differ()
2025-07-01 05:47:30.854 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:47:30.867 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:47:30.877 >>> print(''.join(results), end="")
2025-07-01 05:47:30.885 - abcDefghiJkl
2025-07-01 05:47:30.902 + abcdefGhijkl
2025-07-01 05:47:30.914 """
2025-07-01 05:47:30.919
2025-07-01 05:47:30.925 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:47:30.930 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:47:30.940 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:47:30.951 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:47:30.960 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:47:30.966
2025-07-01 05:47:30.973 # search for the pair that matches best without being identical
2025-07-01 05:47:30.979 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:47:30.985 # on junk -- unless we have to)
2025-07-01 05:47:30.991 for j in range(blo, bhi):
2025-07-01 05:47:30.998 bj = b[j]
2025-07-01 05:47:31.009 cruncher.set_seq2(bj)
2025-07-01 05:47:31.019 for i in range(alo, ahi):
2025-07-01 05:47:31.029 ai = a[i]
2025-07-01 05:47:31.037 if ai == bj:
2025-07-01 05:47:31.044 if eqi is None:
2025-07-01 05:47:31.051 eqi, eqj = i, j
2025-07-01 05:47:31.060 continue
2025-07-01 05:47:31.070 cruncher.set_seq1(ai)
2025-07-01 05:47:31.078 # computing similarity is expensive, so use the quick
2025-07-01 05:47:31.086 # upper bounds first -- have seen this speed up messy
2025-07-01 05:47:31.094 # compares by a factor of 3.
2025-07-01 05:47:31.106 # note that ratio() is only expensive to compute the first
2025-07-01 05:47:31.115 # time it's called on a sequence pair; the expensive part
2025-07-01 05:47:31.122 # of the computation is cached by cruncher
2025-07-01 05:47:31.129 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:47:31.136 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:47:31.143 cruncher.ratio() > best_ratio:
2025-07-01 05:47:31.155 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:47:31.165 if best_ratio < cutoff:
2025-07-01 05:47:31.172 # no non-identical "pretty close" pair
2025-07-01 05:47:31.179 if eqi is None:
2025-07-01 05:47:31.185 # no identical pair either -- treat it as a straight replace
2025-07-01 05:47:31.191 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:47:31.203 return
2025-07-01 05:47:31.212 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:47:31.221 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:47:31.228 else:
2025-07-01 05:47:31.242 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:47:31.254 eqi = None
2025-07-01 05:47:31.264
2025-07-01 05:47:31.273 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:47:31.280 # identical
2025-07-01 05:47:31.286
2025-07-01 05:47:31.292 # pump out diffs from before the synch point
2025-07-01 05:47:31.300 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:47:31.307
2025-07-01 05:47:31.315 # do intraline marking on the synch pair
2025-07-01 05:47:31.327 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:47:31.337 if eqi is None:
2025-07-01 05:47:31.350 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:47:31.360 atags = btags = ""
2025-07-01 05:47:31.373 cruncher.set_seqs(aelt, belt)
2025-07-01 05:47:31.383 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:47:31.394 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:47:31.403 if tag == 'replace':
2025-07-01 05:47:31.413 atags += '^' * la
2025-07-01 05:47:31.424 btags += '^' * lb
2025-07-01 05:47:31.434 elif tag == 'delete':
2025-07-01 05:47:31.446 atags += '-' * la
2025-07-01 05:47:31.454 elif tag == 'insert':
2025-07-01 05:47:31.463 btags += '+' * lb
2025-07-01 05:47:31.472 elif tag == 'equal':
2025-07-01 05:47:31.478 atags += ' ' * la
2025-07-01 05:47:31.490 btags += ' ' * lb
2025-07-01 05:47:31.499 else:
2025-07-01 05:47:31.508 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:47:31.515 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:47:31.522 else:
2025-07-01 05:47:31.528 # the synch pair is identical
2025-07-01 05:47:31.534 yield ' ' + aelt
2025-07-01 05:47:31.538
2025-07-01 05:47:31.543 # pump out diffs from after the synch point
2025-07-01 05:47:31.548 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:47:31.552
2025-07-01 05:47:31.557 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:47:31.563 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:47:31.568
2025-07-01 05:47:31.573 self = <difflib.Differ object at [hex]>
2025-07-01 05:47:31.582 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:47:31.588 alo = 236, ahi = 1101
2025-07-01 05:47:31.594 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:47:31.600 blo = 236, bhi = 1101
2025-07-01 05:47:31.605
2025-07-01 05:47:31.613 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:47:31.622 g = []
2025-07-01 05:47:31.632 if alo < ahi:
2025-07-01 05:47:31.641 if blo < bhi:
2025-07-01 05:47:31.648 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:47:31.654 else:
2025-07-01 05:47:31.666 g = self._dump('-', a, alo, ahi)
2025-07-01 05:47:31.674 elif blo < bhi:
2025-07-01 05:47:31.682 g = self._dump('+', b, blo, bhi)
2025-07-01 05:47:31.690
2025-07-01 05:47:31.698 > yield from g
2025-07-01 05:47:31.705
2025-07-01 05:47:31.712 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:47:31.719 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:47:31.724
2025-07-01 05:47:31.730 self = <difflib.Differ object at [hex]>
2025-07-01 05:47:31.738 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:47:31.743 alo = 236, ahi = 1101
2025-07-01 05:47:31.757 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:47:31.764 blo = 236, bhi = 1101
2025-07-01 05:47:31.770
2025-07-01 05:47:31.777 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:47:31.788 r"""
2025-07-01 05:47:31.799 When replacing one block of lines with another, search the blocks
2025-07-01 05:47:31.809 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:47:31.815 synch point, and intraline difference marking is done on the
2025-07-01 05:47:31.822 similar pair. Lots of work, but often worth it.
2025-07-01 05:47:31.828
2025-07-01 05:47:31.835 Example:
2025-07-01 05:47:31.842
2025-07-01 05:47:31.854 >>> d = Differ()
2025-07-01 05:47:31.866 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:47:31.878 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:47:31.887 >>> print(''.join(results), end="")
2025-07-01 05:47:31.895 - abcDefghiJkl
2025-07-01 05:47:31.912 + abcdefGhijkl
2025-07-01 05:47:31.932 """
2025-07-01 05:47:31.939
2025-07-01 05:47:31.946 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:47:31.952 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:47:31.958 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:47:31.964 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:47:31.972 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:47:31.979
2025-07-01 05:47:31.986 # search for the pair that matches best without being identical
2025-07-01 05:47:31.995 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:47:32.002 # on junk -- unless we have to)
2025-07-01 05:47:32.011 for j in range(blo, bhi):
2025-07-01 05:47:32.022 bj = b[j]
2025-07-01 05:47:32.030 cruncher.set_seq2(bj)
2025-07-01 05:47:32.037 for i in range(alo, ahi):
2025-07-01 05:47:32.043 ai = a[i]
2025-07-01 05:47:32.049 if ai == bj:
2025-07-01 05:47:32.059 if eqi is None:
2025-07-01 05:47:32.067 eqi, eqj = i, j
2025-07-01 05:47:32.074 continue
2025-07-01 05:47:32.080 cruncher.set_seq1(ai)
2025-07-01 05:47:32.088 # computing similarity is expensive, so use the quick
2025-07-01 05:47:32.096 # upper bounds first -- have seen this speed up messy
2025-07-01 05:47:32.103 # compares by a factor of 3.
2025-07-01 05:47:32.110 # note that ratio() is only expensive to compute the first
2025-07-01 05:47:32.118 # time it's called on a sequence pair; the expensive part
2025-07-01 05:47:32.129 # of the computation is cached by cruncher
2025-07-01 05:47:32.141 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:47:32.149 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:47:32.156 cruncher.ratio() > best_ratio:
2025-07-01 05:47:32.162 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:47:32.168 if best_ratio < cutoff:
2025-07-01 05:47:32.174 # no non-identical "pretty close" pair
2025-07-01 05:47:32.180 if eqi is None:
2025-07-01 05:47:32.186 # no identical pair either -- treat it as a straight replace
2025-07-01 05:47:32.193 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:47:32.200 return
2025-07-01 05:47:32.208 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:47:32.215 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:47:32.222 else:
2025-07-01 05:47:32.230 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:47:32.241 eqi = None
2025-07-01 05:47:32.251
2025-07-01 05:47:32.259 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:47:32.266 # identical
2025-07-01 05:47:32.276
2025-07-01 05:47:32.287 # pump out diffs from before the synch point
2025-07-01 05:47:32.296 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:47:32.304
2025-07-01 05:47:32.312 # do intraline marking on the synch pair
2025-07-01 05:47:32.319 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:47:32.326 if eqi is None:
2025-07-01 05:47:32.332 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:47:32.339 atags = btags = ""
2025-07-01 05:47:32.346 cruncher.set_seqs(aelt, belt)
2025-07-01 05:47:32.355 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:47:32.366 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:47:32.372 if tag == 'replace':
2025-07-01 05:47:32.379 atags += '^' * la
2025-07-01 05:47:32.385 btags += '^' * lb
2025-07-01 05:47:32.391 elif tag == 'delete':
2025-07-01 05:47:32.399 atags += '-' * la
2025-07-01 05:47:32.410 elif tag == 'insert':
2025-07-01 05:47:32.418 btags += '+' * lb
2025-07-01 05:47:32.424 elif tag == 'equal':
2025-07-01 05:47:32.431 atags += ' ' * la
2025-07-01 05:47:32.437 btags += ' ' * lb
2025-07-01 05:47:32.443 else:
2025-07-01 05:47:32.448 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:47:32.457 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:47:32.464 else:
2025-07-01 05:47:32.471 # the synch pair is identical
2025-07-01 05:47:32.478 yield ' ' + aelt
2025-07-01 05:47:32.490
2025-07-01 05:47:32.497 # pump out diffs from after the synch point
2025-07-01 05:47:32.504 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:47:32.510
2025-07-01 05:47:32.518 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:47:32.526 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:47:32.533
2025-07-01 05:47:32.541 self = <difflib.Differ object at [hex]>
2025-07-01 05:47:32.549 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:47:32.562 alo = 237, ahi = 1101
2025-07-01 05:47:32.575 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:47:32.583 blo = 237, bhi = 1101
2025-07-01 05:47:32.589
2025-07-01 05:47:32.597 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:47:32.610 g = []
2025-07-01 05:47:32.617 if alo < ahi:
2025-07-01 05:47:32.623 if blo < bhi:
2025-07-01 05:47:32.630 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:47:32.640 else:
2025-07-01 05:47:32.650 g = self._dump('-', a, alo, ahi)
2025-07-01 05:47:32.658 elif blo < bhi:
2025-07-01 05:47:32.665 g = self._dump('+', b, blo, bhi)
2025-07-01 05:47:32.676
2025-07-01 05:47:32.687 > yield from g
2025-07-01 05:47:32.694
2025-07-01 05:47:32.702 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:47:32.710 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:47:32.721
2025-07-01 05:47:32.732 self = <difflib.Differ object at [hex]>
2025-07-01 05:47:32.741 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:47:32.752 alo = 237, ahi = 1101
2025-07-01 05:47:32.763 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:47:32.773 blo = 237, bhi = 1101
2025-07-01 05:47:32.782
2025-07-01 05:47:32.790 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:47:32.801 r"""
2025-07-01 05:47:32.810 When replacing one block of lines with another, search the blocks
2025-07-01 05:47:32.820 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:47:32.830 synch point, and intraline difference marking is done on the
2025-07-01 05:47:32.839 similar pair. Lots of work, but often worth it.
2025-07-01 05:47:32.847
2025-07-01 05:47:32.853 Example:
2025-07-01 05:47:32.859
2025-07-01 05:47:32.865 >>> d = Differ()
2025-07-01 05:47:32.876 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:47:32.886 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:47:32.895 >>> print(''.join(results), end="")
2025-07-01 05:47:32.906 - abcDefghiJkl
2025-07-01 05:47:32.925 + abcdefGhijkl
2025-07-01 05:47:32.938 """
2025-07-01 05:47:32.948
2025-07-01 05:47:32.958 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:47:32.966 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:47:32.976 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:47:32.985 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:47:32.992 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:47:32.999
2025-07-01 05:47:33.007 # search for the pair that matches best without being identical
2025-07-01 05:47:33.019 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:47:33.028 # on junk -- unless we have to)
2025-07-01 05:47:33.036 for j in range(blo, bhi):
2025-07-01 05:47:33.043 bj = b[j]
2025-07-01 05:47:33.049 cruncher.set_seq2(bj)
2025-07-01 05:47:33.054 for i in range(alo, ahi):
2025-07-01 05:47:33.060 ai = a[i]
2025-07-01 05:47:33.065 if ai == bj:
2025-07-01 05:47:33.071 if eqi is None:
2025-07-01 05:47:33.076 eqi, eqj = i, j
2025-07-01 05:47:33.081 continue
2025-07-01 05:47:33.090 cruncher.set_seq1(ai)
2025-07-01 05:47:33.097 # computing similarity is expensive, so use the quick
2025-07-01 05:47:33.103 # upper bounds first -- have seen this speed up messy
2025-07-01 05:47:33.109 # compares by a factor of 3.
2025-07-01 05:47:33.115 # note that ratio() is only expensive to compute the first
2025-07-01 05:47:33.121 # time it's called on a sequence pair; the expensive part
2025-07-01 05:47:33.126 # of the computation is cached by cruncher
2025-07-01 05:47:33.132 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:47:33.138 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:47:33.148 cruncher.ratio() > best_ratio:
2025-07-01 05:47:33.157 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:47:33.164 if best_ratio < cutoff:
2025-07-01 05:47:33.171 # no non-identical "pretty close" pair
2025-07-01 05:47:33.177 if eqi is None:
2025-07-01 05:47:33.182 # no identical pair either -- treat it as a straight replace
2025-07-01 05:47:33.188 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:47:33.200 return
2025-07-01 05:47:33.210 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:47:33.222 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:47:33.231 else:
2025-07-01 05:47:33.239 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:47:33.246 eqi = None
2025-07-01 05:47:33.252
2025-07-01 05:47:33.258 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:47:33.264 # identical
2025-07-01 05:47:33.270
2025-07-01 05:47:33.275 # pump out diffs from before the synch point
2025-07-01 05:47:33.281 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:47:33.287
2025-07-01 05:47:33.292 # do intraline marking on the synch pair
2025-07-01 05:47:33.299 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:47:33.306 if eqi is None:
2025-07-01 05:47:33.316 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:47:33.325 atags = btags = ""
2025-07-01 05:47:33.338 cruncher.set_seqs(aelt, belt)
2025-07-01 05:47:33.346 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:47:33.359 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:47:33.371 if tag == 'replace':
2025-07-01 05:47:33.383 atags += '^' * la
2025-07-01 05:47:33.395 btags += '^' * lb
2025-07-01 05:47:33.403 elif tag == 'delete':
2025-07-01 05:47:33.411 atags += '-' * la
2025-07-01 05:47:33.418 elif tag == 'insert':
2025-07-01 05:47:33.431 btags += '+' * lb
2025-07-01 05:47:33.442 elif tag == 'equal':
2025-07-01 05:47:33.453 atags += ' ' * la
2025-07-01 05:47:33.462 btags += ' ' * lb
2025-07-01 05:47:33.472 else:
2025-07-01 05:47:33.482 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:47:33.490 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:47:33.500 else:
2025-07-01 05:47:33.509 # the synch pair is identical
2025-07-01 05:47:33.519 yield ' ' + aelt
2025-07-01 05:47:33.528
2025-07-01 05:47:33.534 # pump out diffs from after the synch point
2025-07-01 05:47:33.540 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:47:33.549
2025-07-01 05:47:33.557 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:47:33.567 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:47:33.575
2025-07-01 05:47:33.582 self = <difflib.Differ object at [hex]>
2025-07-01 05:47:33.588 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:47:33.593 alo = 238, ahi = 1101
2025-07-01 05:47:33.600 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:47:33.605 blo = 238, bhi = 1101
2025-07-01 05:47:33.611
2025-07-01 05:47:33.617 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:47:33.623 g = []
2025-07-01 05:47:33.629 if alo < ahi:
2025-07-01 05:47:33.635 if blo < bhi:
2025-07-01 05:47:33.640 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:47:33.645 else:
2025-07-01 05:47:33.650 g = self._dump('-', a, alo, ahi)
2025-07-01 05:47:33.655 elif blo < bhi:
2025-07-01 05:47:33.660 g = self._dump('+', b, blo, bhi)
2025-07-01 05:47:33.666
2025-07-01 05:47:33.671 > yield from g
2025-07-01 05:47:33.678
2025-07-01 05:47:33.689 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:47:33.697 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:47:33.705
2025-07-01 05:47:33.711 self = <difflib.Differ object at [hex]>
2025-07-01 05:47:33.718 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:47:33.724 alo = 238, ahi = 1101
2025-07-01 05:47:33.730 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:47:33.736 blo = 238, bhi = 1101
2025-07-01 05:47:33.742
2025-07-01 05:47:33.748 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:47:33.754 r"""
2025-07-01 05:47:33.761 When replacing one block of lines with another, search the blocks
2025-07-01 05:47:33.772 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:47:33.784 synch point, and intraline difference marking is done on the
2025-07-01 05:47:33.793 similar pair. Lots of work, but often worth it.
2025-07-01 05:47:33.806
2025-07-01 05:47:33.813 Example:
2025-07-01 05:47:33.819
2025-07-01 05:47:33.832 >>> d = Differ()
2025-07-01 05:47:33.844 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:47:33.853 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:47:33.865 >>> print(''.join(results), end="")
2025-07-01 05:47:33.876 - abcDefghiJkl
2025-07-01 05:47:33.895 + abcdefGhijkl
2025-07-01 05:47:33.914 """
2025-07-01 05:47:33.924
2025-07-01 05:47:33.933 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:47:33.940 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:47:33.947 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:47:33.960 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:47:33.970 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:47:33.977
2025-07-01 05:47:33.985 # search for the pair that matches best without being identical
2025-07-01 05:47:33.992 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:47:33.999 # on junk -- unless we have to)
2025-07-01 05:47:34.007 for j in range(blo, bhi):
2025-07-01 05:47:34.019 bj = b[j]
2025-07-01 05:47:34.030 cruncher.set_seq2(bj)
2025-07-01 05:47:34.038 for i in range(alo, ahi):
2025-07-01 05:47:34.043 ai = a[i]
2025-07-01 05:47:34.050 if ai == bj:
2025-07-01 05:47:34.056 if eqi is None:
2025-07-01 05:47:34.063 eqi, eqj = i, j
2025-07-01 05:47:34.071 continue
2025-07-01 05:47:34.083 cruncher.set_seq1(ai)
2025-07-01 05:47:34.091 # computing similarity is expensive, so use the quick
2025-07-01 05:47:34.100 # upper bounds first -- have seen this speed up messy
2025-07-01 05:47:34.111 # compares by a factor of 3.
2025-07-01 05:47:34.124 # note that ratio() is only expensive to compute the first
2025-07-01 05:47:34.132 # time it's called on a sequence pair; the expensive part
2025-07-01 05:47:34.139 # of the computation is cached by cruncher
2025-07-01 05:47:34.146 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:47:34.156 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:47:34.166 cruncher.ratio() > best_ratio:
2025-07-01 05:47:34.175 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:47:34.187 if best_ratio < cutoff:
2025-07-01 05:47:34.197 # no non-identical "pretty close" pair
2025-07-01 05:47:34.209 if eqi is None:
2025-07-01 05:47:34.220 # no identical pair either -- treat it as a straight replace
2025-07-01 05:47:34.228 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:47:34.234 return
2025-07-01 05:47:34.245 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:47:34.256 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:47:34.265 else:
2025-07-01 05:47:34.272 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:47:34.278 eqi = None
2025-07-01 05:47:34.283
2025-07-01 05:47:34.296 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:47:34.305 # identical
2025-07-01 05:47:34.312
2025-07-01 05:47:34.318 # pump out diffs from before the synch point
2025-07-01 05:47:34.324 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:47:34.329
2025-07-01 05:47:34.334 # do intraline marking on the synch pair
2025-07-01 05:47:34.339 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:47:34.345 if eqi is None:
2025-07-01 05:47:34.351 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:47:34.357 atags = btags = ""
2025-07-01 05:47:34.368 cruncher.set_seqs(aelt, belt)
2025-07-01 05:47:34.382 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:47:34.397 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:47:34.405 if tag == 'replace':
2025-07-01 05:47:34.412 atags += '^' * la
2025-07-01 05:47:34.419 btags += '^' * lb
2025-07-01 05:47:34.425 elif tag == 'delete':
2025-07-01 05:47:34.430 atags += '-' * la
2025-07-01 05:47:34.435 elif tag == 'insert':
2025-07-01 05:47:34.441 btags += '+' * lb
2025-07-01 05:47:34.447 elif tag == 'equal':
2025-07-01 05:47:34.454 atags += ' ' * la
2025-07-01 05:47:34.464 btags += ' ' * lb
2025-07-01 05:47:34.473 else:
2025-07-01 05:47:34.486 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:47:34.496 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:47:34.504 else:
2025-07-01 05:47:34.511 # the synch pair is identical
2025-07-01 05:47:34.520 yield ' ' + aelt
2025-07-01 05:47:34.532
2025-07-01 05:47:34.541 # pump out diffs from after the synch point
2025-07-01 05:47:34.548 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:47:34.555
2025-07-01 05:47:34.562 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:47:34.575 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:47:34.585
2025-07-01 05:47:34.592 self = <difflib.Differ object at [hex]>
2025-07-01 05:47:34.598 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:47:34.603 alo = 239, ahi = 1101
2025-07-01 05:47:34.609 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:47:34.613 blo = 239, bhi = 1101
2025-07-01 05:47:34.618
2025-07-01 05:47:34.622 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:47:34.627 g = []
2025-07-01 05:47:34.632 if alo < ahi:
2025-07-01 05:47:34.639 if blo < bhi:
2025-07-01 05:47:34.647 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:47:34.659 else:
2025-07-01 05:47:34.667 g = self._dump('-', a, alo, ahi)
2025-07-01 05:47:34.673 elif blo < bhi:
2025-07-01 05:47:34.679 g = self._dump('+', b, blo, bhi)
2025-07-01 05:47:34.685
2025-07-01 05:47:34.691 > yield from g
2025-07-01 05:47:34.698
2025-07-01 05:47:34.704 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:47:34.710 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:47:34.716
2025-07-01 05:47:34.722 self = <difflib.Differ object at [hex]>
2025-07-01 05:47:34.730 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:47:34.737 alo = 239, ahi = 1101
2025-07-01 05:47:34.744 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:47:34.751 blo = 239, bhi = 1101
2025-07-01 05:47:34.762
2025-07-01 05:47:34.768 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:47:34.773 r"""
2025-07-01 05:47:34.782 When replacing one block of lines with another, search the blocks
2025-07-01 05:47:34.789 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:47:34.796 synch point, and intraline difference marking is done on the
2025-07-01 05:47:34.803 similar pair. Lots of work, but often worth it.
2025-07-01 05:47:34.811
2025-07-01 05:47:34.822 Example:
2025-07-01 05:47:34.830
2025-07-01 05:47:34.836 >>> d = Differ()
2025-07-01 05:47:34.842 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:47:34.847 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:47:34.858 >>> print(''.join(results), end="")
2025-07-01 05:47:34.865 - abcDefghiJkl
2025-07-01 05:47:34.876 + abcdefGhijkl
2025-07-01 05:47:34.892 """
2025-07-01 05:47:34.902
2025-07-01 05:47:34.909 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:47:34.916 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:47:34.922 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:47:34.926 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:47:34.932 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:47:34.937
2025-07-01 05:47:34.944 # search for the pair that matches best without being identical
2025-07-01 05:47:34.950 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:47:34.956 # on junk -- unless we have to)
2025-07-01 05:47:34.963 for j in range(blo, bhi):
2025-07-01 05:47:34.969 bj = b[j]
2025-07-01 05:47:34.980 cruncher.set_seq2(bj)
2025-07-01 05:47:34.990 for i in range(alo, ahi):
2025-07-01 05:47:34.997 ai = a[i]
2025-07-01 05:47:35.004 if ai == bj:
2025-07-01 05:47:35.009 if eqi is None:
2025-07-01 05:47:35.015 eqi, eqj = i, j
2025-07-01 05:47:35.020 continue
2025-07-01 05:47:35.028 cruncher.set_seq1(ai)
2025-07-01 05:47:35.036 # computing similarity is expensive, so use the quick
2025-07-01 05:47:35.043 # upper bounds first -- have seen this speed up messy
2025-07-01 05:47:35.050 # compares by a factor of 3.
2025-07-01 05:47:35.057 # note that ratio() is only expensive to compute the first
2025-07-01 05:47:35.064 # time it's called on a sequence pair; the expensive part
2025-07-01 05:47:35.071 # of the computation is cached by cruncher
2025-07-01 05:47:35.077 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:47:35.087 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:47:35.093 cruncher.ratio() > best_ratio:
2025-07-01 05:47:35.100 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:47:35.107 if best_ratio < cutoff:
2025-07-01 05:47:35.114 # no non-identical "pretty close" pair
2025-07-01 05:47:35.127 if eqi is None:
2025-07-01 05:47:35.137 # no identical pair either -- treat it as a straight replace
2025-07-01 05:47:35.145 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:47:35.152 return
2025-07-01 05:47:35.158 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:47:35.164 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:47:35.170 else:
2025-07-01 05:47:35.176 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:47:35.182 eqi = None
2025-07-01 05:47:35.188
2025-07-01 05:47:35.195 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:47:35.201 # identical
2025-07-01 05:47:35.206
2025-07-01 05:47:35.216 # pump out diffs from before the synch point
2025-07-01 05:47:35.225 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:47:35.230
2025-07-01 05:47:35.236 # do intraline marking on the synch pair
2025-07-01 05:47:35.243 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:47:35.249 if eqi is None:
2025-07-01 05:47:35.255 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:47:35.262 atags = btags = ""
2025-07-01 05:47:35.269 cruncher.set_seqs(aelt, belt)
2025-07-01 05:47:35.276 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:47:35.283 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:47:35.290 if tag == 'replace':
2025-07-01 05:47:35.297 atags += '^' * la
2025-07-01 05:47:35.304 btags += '^' * lb
2025-07-01 05:47:35.310 elif tag == 'delete':
2025-07-01 05:47:35.316 atags += '-' * la
2025-07-01 05:47:35.323 elif tag == 'insert':
2025-07-01 05:47:35.329 btags += '+' * lb
2025-07-01 05:47:35.336 elif tag == 'equal':
2025-07-01 05:47:35.345 atags += ' ' * la
2025-07-01 05:47:35.354 btags += ' ' * lb
2025-07-01 05:47:35.361 else:
2025-07-01 05:47:35.368 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:47:35.374 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:47:35.383 else:
2025-07-01 05:47:35.394 # the synch pair is identical
2025-07-01 05:47:35.402 yield ' ' + aelt
2025-07-01 05:47:35.408
2025-07-01 05:47:35.414 # pump out diffs from after the synch point
2025-07-01 05:47:35.420 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:47:35.426
2025-07-01 05:47:35.432 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:47:35.439 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:47:35.446
2025-07-01 05:47:35.453 self = <difflib.Differ object at [hex]>
2025-07-01 05:47:35.461 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:47:35.467 alo = 240, ahi = 1101
2025-07-01 05:47:35.473 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:47:35.477 blo = 240, bhi = 1101
2025-07-01 05:47:35.482
2025-07-01 05:47:35.488 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:47:35.494 g = []
2025-07-01 05:47:35.500 if alo < ahi:
2025-07-01 05:47:35.506 if blo < bhi:
2025-07-01 05:47:35.514 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:47:35.520 else:
2025-07-01 05:47:35.527 g = self._dump('-', a, alo, ahi)
2025-07-01 05:47:35.534 elif blo < bhi:
2025-07-01 05:47:35.540 g = self._dump('+', b, blo, bhi)
2025-07-01 05:47:35.546
2025-07-01 05:47:35.552 > yield from g
2025-07-01 05:47:35.557
2025-07-01 05:47:35.563 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:47:35.568 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:47:35.573
2025-07-01 05:47:35.578 self = <difflib.Differ object at [hex]>
2025-07-01 05:47:35.587 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:47:35.593 alo = 240, ahi = 1101
2025-07-01 05:47:35.599 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:47:35.605 blo = 240, bhi = 1101
2025-07-01 05:47:35.611
2025-07-01 05:47:35.618 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:47:35.625 r"""
2025-07-01 05:47:35.631 When replacing one block of lines with another, search the blocks
2025-07-01 05:47:35.638 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:47:35.644 synch point, and intraline difference marking is done on the
2025-07-01 05:47:35.651 similar pair. Lots of work, but often worth it.
2025-07-01 05:47:35.658
2025-07-01 05:47:35.667 Example:
2025-07-01 05:47:35.677
2025-07-01 05:47:35.685 >>> d = Differ()
2025-07-01 05:47:35.692 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:47:35.699 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:47:35.708 >>> print(''.join(results), end="")
2025-07-01 05:47:35.718 - abcDefghiJkl
2025-07-01 05:47:35.733 + abcdefGhijkl
2025-07-01 05:47:35.743 """
2025-07-01 05:47:35.747
2025-07-01 05:47:35.752 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:47:35.757 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:47:35.763 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:47:35.768 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:47:35.774 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:47:35.780
2025-07-01 05:47:35.786 # search for the pair that matches best without being identical
2025-07-01 05:47:35.791 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:47:35.796 # on junk -- unless we have to)
2025-07-01 05:47:35.801 for j in range(blo, bhi):
2025-07-01 05:47:35.805 bj = b[j]
2025-07-01 05:47:35.809 cruncher.set_seq2(bj)
2025-07-01 05:47:35.815 for i in range(alo, ahi):
2025-07-01 05:47:35.825 ai = a[i]
2025-07-01 05:47:35.833 if ai == bj:
2025-07-01 05:47:35.840 if eqi is None:
2025-07-01 05:47:35.847 eqi, eqj = i, j
2025-07-01 05:47:35.855 continue
2025-07-01 05:47:35.866 cruncher.set_seq1(ai)
2025-07-01 05:47:35.874 # computing similarity is expensive, so use the quick
2025-07-01 05:47:35.880 # upper bounds first -- have seen this speed up messy
2025-07-01 05:47:35.886 # compares by a factor of 3.
2025-07-01 05:47:35.891 # note that ratio() is only expensive to compute the first
2025-07-01 05:47:35.896 # time it's called on a sequence pair; the expensive part
2025-07-01 05:47:35.900 # of the computation is cached by cruncher
2025-07-01 05:47:35.905 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:47:35.910 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:47:35.914 cruncher.ratio() > best_ratio:
2025-07-01 05:47:35.919 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:47:35.923 if best_ratio < cutoff:
2025-07-01 05:47:35.928 # no non-identical "pretty close" pair
2025-07-01 05:47:35.934 if eqi is None:
2025-07-01 05:47:35.939 # no identical pair either -- treat it as a straight replace
2025-07-01 05:47:35.945 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:47:35.951 return
2025-07-01 05:47:35.957 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:47:35.963 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:47:35.970 else:
2025-07-01 05:47:35.979 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:47:35.987 eqi = None
2025-07-01 05:47:35.993
2025-07-01 05:47:35.999 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:47:36.005 # identical
2025-07-01 05:47:36.012
2025-07-01 05:47:36.019 # pump out diffs from before the synch point
2025-07-01 05:47:36.027 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:47:36.035
2025-07-01 05:47:36.046 # do intraline marking on the synch pair
2025-07-01 05:47:36.054 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:47:36.060 if eqi is None:
2025-07-01 05:47:36.073 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:47:36.082 atags = btags = ""
2025-07-01 05:47:36.090 cruncher.set_seqs(aelt, belt)
2025-07-01 05:47:36.099 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:47:36.110 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:47:36.122 if tag == 'replace':
2025-07-01 05:47:36.133 atags += '^' * la
2025-07-01 05:47:36.143 btags += '^' * lb
2025-07-01 05:47:36.154 elif tag == 'delete':
2025-07-01 05:47:36.164 atags += '-' * la
2025-07-01 05:47:36.171 elif tag == 'insert':
2025-07-01 05:47:36.178 btags += '+' * lb
2025-07-01 05:47:36.184 elif tag == 'equal':
2025-07-01 05:47:36.191 atags += ' ' * la
2025-07-01 05:47:36.203 btags += ' ' * lb
2025-07-01 05:47:36.213 else:
2025-07-01 05:47:36.222 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:47:36.235 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:47:36.245 else:
2025-07-01 05:47:36.256 # the synch pair is identical
2025-07-01 05:47:36.265 yield ' ' + aelt
2025-07-01 05:47:36.273
2025-07-01 05:47:36.279 # pump out diffs from after the synch point
2025-07-01 05:47:36.285 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:47:36.291
2025-07-01 05:47:36.301 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:47:36.316 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:47:36.328
2025-07-01 05:47:36.340 self = <difflib.Differ object at [hex]>
2025-07-01 05:47:36.354 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:47:36.363 alo = 241, ahi = 1101
2025-07-01 05:47:36.374 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:47:36.379 blo = 241, bhi = 1101
2025-07-01 05:47:36.391
2025-07-01 05:47:36.400 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:47:36.407 g = []
2025-07-01 05:47:36.412 if alo < ahi:
2025-07-01 05:47:36.417 if blo < bhi:
2025-07-01 05:47:36.423 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:47:36.428 else:
2025-07-01 05:47:36.434 g = self._dump('-', a, alo, ahi)
2025-07-01 05:47:36.440 elif blo < bhi:
2025-07-01 05:47:36.446 g = self._dump('+', b, blo, bhi)
2025-07-01 05:47:36.452
2025-07-01 05:47:36.458 > yield from g
2025-07-01 05:47:36.465
2025-07-01 05:47:36.471 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:47:36.477 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:47:36.482
2025-07-01 05:47:36.489 self = <difflib.Differ object at [hex]>
2025-07-01 05:47:36.499 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:47:36.510 alo = 241, ahi = 1101
2025-07-01 05:47:36.518 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:47:36.524 blo = 241, bhi = 1101
2025-07-01 05:47:36.530
2025-07-01 05:47:36.537 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:47:36.543 r"""
2025-07-01 05:47:36.551 When replacing one block of lines with another, search the blocks
2025-07-01 05:47:36.558 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:47:36.565 synch point, and intraline difference marking is done on the
2025-07-01 05:47:36.574 similar pair. Lots of work, but often worth it.
2025-07-01 05:47:36.582
2025-07-01 05:47:36.589 Example:
2025-07-01 05:47:36.596
2025-07-01 05:47:36.603 >>> d = Differ()
2025-07-01 05:47:36.614 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:47:36.622 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:47:36.629 >>> print(''.join(results), end="")
2025-07-01 05:47:36.635 - abcDefghiJkl
2025-07-01 05:47:36.645 + abcdefGhijkl
2025-07-01 05:47:36.656 """
2025-07-01 05:47:36.662
2025-07-01 05:47:36.672 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:47:36.681 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:47:36.689 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:47:36.701 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:47:36.714 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:47:36.726
2025-07-01 05:47:36.738 # search for the pair that matches best without being identical
2025-07-01 05:47:36.749 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:47:36.756 # on junk -- unless we have to)
2025-07-01 05:47:36.762 for j in range(blo, bhi):
2025-07-01 05:47:36.768 bj = b[j]
2025-07-01 05:47:36.775 cruncher.set_seq2(bj)
2025-07-01 05:47:36.783 for i in range(alo, ahi):
2025-07-01 05:47:36.794 ai = a[i]
2025-07-01 05:47:36.806 if ai == bj:
2025-07-01 05:47:36.818 if eqi is None:
2025-07-01 05:47:36.829 eqi, eqj = i, j
2025-07-01 05:47:36.839 continue
2025-07-01 05:47:36.850 cruncher.set_seq1(ai)
2025-07-01 05:47:36.858 # computing similarity is expensive, so use the quick
2025-07-01 05:47:36.866 # upper bounds first -- have seen this speed up messy
2025-07-01 05:47:36.874 # compares by a factor of 3.
2025-07-01 05:47:36.884 # note that ratio() is only expensive to compute the first
2025-07-01 05:47:36.893 # time it's called on a sequence pair; the expensive part
2025-07-01 05:47:36.901 # of the computation is cached by cruncher
2025-07-01 05:47:36.908 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:47:36.915 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:47:36.921 cruncher.ratio() > best_ratio:
2025-07-01 05:47:36.926 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:47:36.933 if best_ratio < cutoff:
2025-07-01 05:47:36.939 # no non-identical "pretty close" pair
2025-07-01 05:47:36.946 if eqi is None:
2025-07-01 05:47:36.958 # no identical pair either -- treat it as a straight replace
2025-07-01 05:47:36.968 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:47:36.976 return
2025-07-01 05:47:36.984 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:47:36.991 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:47:36.998 else:
2025-07-01 05:47:37.011 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:47:37.021 eqi = None
2025-07-01 05:47:37.028
2025-07-01 05:47:37.034 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:47:37.045 # identical
2025-07-01 05:47:37.056
2025-07-01 05:47:37.069 # pump out diffs from before the synch point
2025-07-01 05:47:37.081 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:47:37.093
2025-07-01 05:47:37.106 # do intraline marking on the synch pair
2025-07-01 05:47:37.115 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:47:37.123 if eqi is None:
2025-07-01 05:47:37.132 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:47:37.144 atags = btags = ""
2025-07-01 05:47:37.156 cruncher.set_seqs(aelt, belt)
2025-07-01 05:47:37.169 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:47:37.181 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:47:37.192 if tag == 'replace':
2025-07-01 05:47:37.204 atags += '^' * la
2025-07-01 05:47:37.216 btags += '^' * lb
2025-07-01 05:47:37.226 elif tag == 'delete':
2025-07-01 05:47:37.233 atags += '-' * la
2025-07-01 05:47:37.238 elif tag == 'insert':
2025-07-01 05:47:37.243 btags += '+' * lb
2025-07-01 05:47:37.248 elif tag == 'equal':
2025-07-01 05:47:37.253 atags += ' ' * la
2025-07-01 05:47:37.264 btags += ' ' * lb
2025-07-01 05:47:37.271 else:
2025-07-01 05:47:37.279 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:47:37.286 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:47:37.293 else:
2025-07-01 05:47:37.298 # the synch pair is identical
2025-07-01 05:47:37.304 yield ' ' + aelt
2025-07-01 05:47:37.309
2025-07-01 05:47:37.313 # pump out diffs from after the synch point
2025-07-01 05:47:37.318 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:47:37.323
2025-07-01 05:47:37.328 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:47:37.334 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:47:37.340
2025-07-01 05:47:37.346 self = <difflib.Differ object at [hex]>
2025-07-01 05:47:37.354 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:47:37.360 alo = 242, ahi = 1101
2025-07-01 05:47:37.366 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:47:37.372 blo = 242, bhi = 1101
2025-07-01 05:47:37.377
2025-07-01 05:47:37.383 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:47:37.390 g = []
2025-07-01 05:47:37.403 if alo < ahi:
2025-07-01 05:47:37.414 if blo < bhi:
2025-07-01 05:47:37.420 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:47:37.427 else:
2025-07-01 05:47:37.435 g = self._dump('-', a, alo, ahi)
2025-07-01 05:47:37.443 elif blo < bhi:
2025-07-01 05:47:37.451 g = self._dump('+', b, blo, bhi)
2025-07-01 05:47:37.462
2025-07-01 05:47:37.474 > yield from g
2025-07-01 05:47:37.482
2025-07-01 05:47:37.491 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:47:37.502 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:47:37.511
2025-07-01 05:47:37.519 self = <difflib.Differ object at [hex]>
2025-07-01 05:47:37.527 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:47:37.535 alo = 242, ahi = 1101
2025-07-01 05:47:37.545 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:47:37.554 blo = 242, bhi = 1101
2025-07-01 05:47:37.561
2025-07-01 05:47:37.568 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:47:37.573 r"""
2025-07-01 05:47:37.577 When replacing one block of lines with another, search the blocks
2025-07-01 05:47:37.582 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:47:37.587 synch point, and intraline difference marking is done on the
2025-07-01 05:47:37.592 similar pair. Lots of work, but often worth it.
2025-07-01 05:47:37.598
2025-07-01 05:47:37.603 Example:
2025-07-01 05:47:37.609
2025-07-01 05:47:37.615 >>> d = Differ()
2025-07-01 05:47:37.623 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:47:37.634 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:47:37.643 >>> print(''.join(results), end="")
2025-07-01 05:47:37.649 - abcDefghiJkl
2025-07-01 05:47:37.660 + abcdefGhijkl
2025-07-01 05:47:37.669 """
2025-07-01 05:47:37.674
2025-07-01 05:47:37.678 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:47:37.684 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:47:37.689 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:47:37.695 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:47:37.701 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:47:37.707
2025-07-01 05:47:37.715 # search for the pair that matches best without being identical
2025-07-01 05:47:37.725 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:47:37.734 # on junk -- unless we have to)
2025-07-01 05:47:37.742 for j in range(blo, bhi):
2025-07-01 05:47:37.752 bj = b[j]
2025-07-01 05:47:37.762 cruncher.set_seq2(bj)
2025-07-01 05:47:37.770 for i in range(alo, ahi):
2025-07-01 05:47:37.781 ai = a[i]
2025-07-01 05:47:37.791 if ai == bj:
2025-07-01 05:47:37.800 if eqi is None:
2025-07-01 05:47:37.812 eqi, eqj = i, j
2025-07-01 05:47:37.825 continue
2025-07-01 05:47:37.835 cruncher.set_seq1(ai)
2025-07-01 05:47:37.844 # computing similarity is expensive, so use the quick
2025-07-01 05:47:37.852 # upper bounds first -- have seen this speed up messy
2025-07-01 05:47:37.859 # compares by a factor of 3.
2025-07-01 05:47:37.866 # note that ratio() is only expensive to compute the first
2025-07-01 05:47:37.872 # time it's called on a sequence pair; the expensive part
2025-07-01 05:47:37.878 # of the computation is cached by cruncher
2025-07-01 05:47:37.888 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:47:37.898 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:47:37.910 cruncher.ratio() > best_ratio:
2025-07-01 05:47:37.921 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:47:37.932 if best_ratio < cutoff:
2025-07-01 05:47:37.939 # no non-identical "pretty close" pair
2025-07-01 05:47:37.946 if eqi is None:
2025-07-01 05:47:37.958 # no identical pair either -- treat it as a straight replace
2025-07-01 05:47:37.967 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:47:37.974 return
2025-07-01 05:47:37.980 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:47:37.991 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:47:38.001 else:
2025-07-01 05:47:38.008 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:47:38.015 eqi = None
2025-07-01 05:47:38.022
2025-07-01 05:47:38.032 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:47:38.041 # identical
2025-07-01 05:47:38.048
2025-07-01 05:47:38.054 # pump out diffs from before the synch point
2025-07-01 05:47:38.059 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:47:38.065
2025-07-01 05:47:38.070 # do intraline marking on the synch pair
2025-07-01 05:47:38.075 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:47:38.081 if eqi is None:
2025-07-01 05:47:38.087 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:47:38.094 atags = btags = ""
2025-07-01 05:47:38.104 cruncher.set_seqs(aelt, belt)
2025-07-01 05:47:38.111 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:47:38.117 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:47:38.123 if tag == 'replace':
2025-07-01 05:47:38.130 atags += '^' * la
2025-07-01 05:47:38.139 btags += '^' * lb
2025-07-01 05:47:38.149 elif tag == 'delete':
2025-07-01 05:47:38.161 atags += '-' * la
2025-07-01 05:47:38.170 elif tag == 'insert':
2025-07-01 05:47:38.179 btags += '+' * lb
2025-07-01 05:47:38.186 elif tag == 'equal':
2025-07-01 05:47:38.196 atags += ' ' * la
2025-07-01 05:47:38.209 btags += ' ' * lb
2025-07-01 05:47:38.220 else:
2025-07-01 05:47:38.229 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:47:38.242 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:47:38.252 else:
2025-07-01 05:47:38.260 # the synch pair is identical
2025-07-01 05:47:38.267 yield ' ' + aelt
2025-07-01 05:47:38.276
2025-07-01 05:47:38.289 # pump out diffs from after the synch point
2025-07-01 05:47:38.299 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:47:38.306
2025-07-01 05:47:38.314 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:47:38.325 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:47:38.334
2025-07-01 05:47:38.344 self = <difflib.Differ object at [hex]>
2025-07-01 05:47:38.353 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:47:38.360 alo = 243, ahi = 1101
2025-07-01 05:47:38.368 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:47:38.374 blo = 243, bhi = 1101
2025-07-01 05:47:38.387
2025-07-01 05:47:38.398 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:47:38.406 g = []
2025-07-01 05:47:38.412 if alo < ahi:
2025-07-01 05:47:38.418 if blo < bhi:
2025-07-01 05:47:38.426 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:47:38.432 else:
2025-07-01 05:47:38.445 g = self._dump('-', a, alo, ahi)
2025-07-01 05:47:38.455 elif blo < bhi:
2025-07-01 05:47:38.464 g = self._dump('+', b, blo, bhi)
2025-07-01 05:47:38.471
2025-07-01 05:47:38.478 > yield from g
2025-07-01 05:47:38.484
2025-07-01 05:47:38.490 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:47:38.499 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:47:38.506
2025-07-01 05:47:38.518 self = <difflib.Differ object at [hex]>
2025-07-01 05:47:38.528 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:47:38.535 alo = 243, ahi = 1101
2025-07-01 05:47:38.541 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:47:38.547 blo = 243, bhi = 1101
2025-07-01 05:47:38.553
2025-07-01 05:47:38.559 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:47:38.565 r"""
2025-07-01 05:47:38.571 When replacing one block of lines with another, search the blocks
2025-07-01 05:47:38.576 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:47:38.582 synch point, and intraline difference marking is done on the
2025-07-01 05:47:38.587 similar pair. Lots of work, but often worth it.
2025-07-01 05:47:38.593
2025-07-01 05:47:38.598 Example:
2025-07-01 05:47:38.604
2025-07-01 05:47:38.609 >>> d = Differ()
2025-07-01 05:47:38.615 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:47:38.621 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:47:38.626 >>> print(''.join(results), end="")
2025-07-01 05:47:38.632 - abcDefghiJkl
2025-07-01 05:47:38.643 + abcdefGhijkl
2025-07-01 05:47:38.662 """
2025-07-01 05:47:38.669
2025-07-01 05:47:38.674 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:47:38.679 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:47:38.683 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:47:38.688 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:47:38.693 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:47:38.697
2025-07-01 05:47:38.704 # search for the pair that matches best without being identical
2025-07-01 05:47:38.711 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:47:38.716 # on junk -- unless we have to)
2025-07-01 05:47:38.722 for j in range(blo, bhi):
2025-07-01 05:47:38.729 bj = b[j]
2025-07-01 05:47:38.736 cruncher.set_seq2(bj)
2025-07-01 05:47:38.743 for i in range(alo, ahi):
2025-07-01 05:47:38.751 ai = a[i]
2025-07-01 05:47:38.763 if ai == bj:
2025-07-01 05:47:38.771 if eqi is None:
2025-07-01 05:47:38.776 eqi, eqj = i, j
2025-07-01 05:47:38.782 continue
2025-07-01 05:47:38.788 cruncher.set_seq1(ai)
2025-07-01 05:47:38.794 # computing similarity is expensive, so use the quick
2025-07-01 05:47:38.801 # upper bounds first -- have seen this speed up messy
2025-07-01 05:47:38.808 # compares by a factor of 3.
2025-07-01 05:47:38.816 # note that ratio() is only expensive to compute the first
2025-07-01 05:47:38.823 # time it's called on a sequence pair; the expensive part
2025-07-01 05:47:38.831 # of the computation is cached by cruncher
2025-07-01 05:47:38.839 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:47:38.849 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:47:38.861 cruncher.ratio() > best_ratio:
2025-07-01 05:47:38.869 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:47:38.876 if best_ratio < cutoff:
2025-07-01 05:47:38.882 # no non-identical "pretty close" pair
2025-07-01 05:47:38.888 if eqi is None:
2025-07-01 05:47:38.894 # no identical pair either -- treat it as a straight replace
2025-07-01 05:47:38.900 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:47:38.906 return
2025-07-01 05:47:38.916 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:47:38.926 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:47:38.935 else:
2025-07-01 05:47:38.946 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:47:38.955 eqi = None
2025-07-01 05:47:38.963
2025-07-01 05:47:38.973 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:47:38.982 # identical
2025-07-01 05:47:38.989
2025-07-01 05:47:38.996 # pump out diffs from before the synch point
2025-07-01 05:47:39.010 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:47:39.018
2025-07-01 05:47:39.024 # do intraline marking on the synch pair
2025-07-01 05:47:39.033 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:47:39.045 if eqi is None:
2025-07-01 05:47:39.053 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:47:39.058 atags = btags = ""
2025-07-01 05:47:39.063 cruncher.set_seqs(aelt, belt)
2025-07-01 05:47:39.068 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:47:39.072 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:47:39.076 if tag == 'replace':
2025-07-01 05:47:39.082 atags += '^' * la
2025-07-01 05:47:39.087 btags += '^' * lb
2025-07-01 05:47:39.095 elif tag == 'delete':
2025-07-01 05:47:39.106 atags += '-' * la
2025-07-01 05:47:39.114 elif tag == 'insert':
2025-07-01 05:47:39.125 btags += '+' * lb
2025-07-01 05:47:39.135 elif tag == 'equal':
2025-07-01 05:47:39.146 atags += ' ' * la
2025-07-01 05:47:39.158 btags += ' ' * lb
2025-07-01 05:47:39.168 else:
2025-07-01 05:47:39.181 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:47:39.191 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:47:39.200 else:
2025-07-01 05:47:39.207 # the synch pair is identical
2025-07-01 05:47:39.221 yield ' ' + aelt
2025-07-01 05:47:39.233
2025-07-01 05:47:39.242 # pump out diffs from after the synch point
2025-07-01 05:47:39.250 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:47:39.260
2025-07-01 05:47:39.270 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:47:39.282 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:47:39.293
2025-07-01 05:47:39.302 self = <difflib.Differ object at [hex]>
2025-07-01 05:47:39.310 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:47:39.323 alo = 246, ahi = 1101
2025-07-01 05:47:39.336 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:47:39.343 blo = 246, bhi = 1101
2025-07-01 05:47:39.349
2025-07-01 05:47:39.357 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:47:39.366 g = []
2025-07-01 05:47:39.374 if alo < ahi:
2025-07-01 05:47:39.385 if blo < bhi:
2025-07-01 05:47:39.393 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:47:39.400 else:
2025-07-01 05:47:39.407 g = self._dump('-', a, alo, ahi)
2025-07-01 05:47:39.414 elif blo < bhi:
2025-07-01 05:47:39.423 g = self._dump('+', b, blo, bhi)
2025-07-01 05:47:39.431
2025-07-01 05:47:39.439 > yield from g
2025-07-01 05:47:39.448
2025-07-01 05:47:39.458 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:47:39.466 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:47:39.472
2025-07-01 05:47:39.483 self = <difflib.Differ object at [hex]>
2025-07-01 05:47:39.492 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:47:39.501 alo = 246, ahi = 1101
2025-07-01 05:47:39.513 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:47:39.523 blo = 246, bhi = 1101
2025-07-01 05:47:39.531
2025-07-01 05:47:39.538 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:47:39.549 r"""
2025-07-01 05:47:39.556 When replacing one block of lines with another, search the blocks
2025-07-01 05:47:39.564 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:47:39.572 synch point, and intraline difference marking is done on the
2025-07-01 05:47:39.579 similar pair. Lots of work, but often worth it.
2025-07-01 05:47:39.587
2025-07-01 05:47:39.598 Example:
2025-07-01 05:47:39.607
2025-07-01 05:47:39.614 >>> d = Differ()
2025-07-01 05:47:39.622 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:47:39.629 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:47:39.636 >>> print(''.join(results), end="")
2025-07-01 05:47:39.647 - abcDefghiJkl
2025-07-01 05:47:39.664 + abcdefGhijkl
2025-07-01 05:47:39.676 """
2025-07-01 05:47:39.682
2025-07-01 05:47:39.694 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:47:39.704 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:47:39.712 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:47:39.718 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:47:39.723 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:47:39.728
2025-07-01 05:47:39.733 # search for the pair that matches best without being identical
2025-07-01 05:47:39.738 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:47:39.742 # on junk -- unless we have to)
2025-07-01 05:47:39.746 for j in range(blo, bhi):
2025-07-01 05:47:39.751 bj = b[j]
2025-07-01 05:47:39.762 cruncher.set_seq2(bj)
2025-07-01 05:47:39.770 for i in range(alo, ahi):
2025-07-01 05:47:39.778 ai = a[i]
2025-07-01 05:47:39.786 if ai == bj:
2025-07-01 05:47:39.793 if eqi is None:
2025-07-01 05:47:39.799 eqi, eqj = i, j
2025-07-01 05:47:39.807 continue
2025-07-01 05:47:39.817 cruncher.set_seq1(ai)
2025-07-01 05:47:39.826 # computing similarity is expensive, so use the quick
2025-07-01 05:47:39.833 # upper bounds first -- have seen this speed up messy
2025-07-01 05:47:39.840 # compares by a factor of 3.
2025-07-01 05:47:39.845 # note that ratio() is only expensive to compute the first
2025-07-01 05:47:39.857 # time it's called on a sequence pair; the expensive part
2025-07-01 05:47:39.868 # of the computation is cached by cruncher
2025-07-01 05:47:39.877 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:47:39.885 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:47:39.892 cruncher.ratio() > best_ratio:
2025-07-01 05:47:39.898 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:47:39.904 if best_ratio < cutoff:
2025-07-01 05:47:39.911 # no non-identical "pretty close" pair
2025-07-01 05:47:39.921 if eqi is None:
2025-07-01 05:47:39.930 # no identical pair either -- treat it as a straight replace
2025-07-01 05:47:39.937 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:47:39.943 return
2025-07-01 05:47:39.950 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:47:39.956 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:47:39.964 else:
2025-07-01 05:47:39.975 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:47:39.983 eqi = None
2025-07-01 05:47:39.990
2025-07-01 05:47:39.997 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:47:40.003 # identical
2025-07-01 05:47:40.009
2025-07-01 05:47:40.015 # pump out diffs from before the synch point
2025-07-01 05:47:40.022 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:47:40.030
2025-07-01 05:47:40.036 # do intraline marking on the synch pair
2025-07-01 05:47:40.042 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:47:40.047 if eqi is None:
2025-07-01 05:47:40.053 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:47:40.060 atags = btags = ""
2025-07-01 05:47:40.065 cruncher.set_seqs(aelt, belt)
2025-07-01 05:47:40.071 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:47:40.076 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:47:40.083 if tag == 'replace':
2025-07-01 05:47:40.095 atags += '^' * la
2025-07-01 05:47:40.109 btags += '^' * lb
2025-07-01 05:47:40.117 elif tag == 'delete':
2025-07-01 05:47:40.124 atags += '-' * la
2025-07-01 05:47:40.130 elif tag == 'insert':
2025-07-01 05:47:40.136 btags += '+' * lb
2025-07-01 05:47:40.141 elif tag == 'equal':
2025-07-01 05:47:40.145 atags += ' ' * la
2025-07-01 05:47:40.150 btags += ' ' * lb
2025-07-01 05:47:40.154 else:
2025-07-01 05:47:40.159 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:47:40.166 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:47:40.172 else:
2025-07-01 05:47:40.177 # the synch pair is identical
2025-07-01 05:47:40.183 yield ' ' + aelt
2025-07-01 05:47:40.190
2025-07-01 05:47:40.200 # pump out diffs from after the synch point
2025-07-01 05:47:40.210 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:47:40.218
2025-07-01 05:47:40.227 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:47:40.236 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:47:40.243
2025-07-01 05:47:40.250 self = <difflib.Differ object at [hex]>
2025-07-01 05:47:40.260 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:47:40.270 alo = 247, ahi = 1101
2025-07-01 05:47:40.279 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:47:40.286 blo = 247, bhi = 1101
2025-07-01 05:47:40.292
2025-07-01 05:47:40.299 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:47:40.304 g = []
2025-07-01 05:47:40.310 if alo < ahi:
2025-07-01 05:47:40.320 if blo < bhi:
2025-07-01 05:47:40.333 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:47:40.344 else:
2025-07-01 05:47:40.355 g = self._dump('-', a, alo, ahi)
2025-07-01 05:47:40.364 elif blo < bhi:
2025-07-01 05:47:40.371 g = self._dump('+', b, blo, bhi)
2025-07-01 05:47:40.377
2025-07-01 05:47:40.381 > yield from g
2025-07-01 05:47:40.386
2025-07-01 05:47:40.391 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:47:40.399 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:47:40.409
2025-07-01 05:47:40.415 self = <difflib.Differ object at [hex]>
2025-07-01 05:47:40.422 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:47:40.434 alo = 247, ahi = 1101
2025-07-01 05:47:40.446 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:47:40.455 blo = 247, bhi = 1101
2025-07-01 05:47:40.463
2025-07-01 05:47:40.470 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:47:40.475 r"""
2025-07-01 05:47:40.480 When replacing one block of lines with another, search the blocks
2025-07-01 05:47:40.485 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:47:40.489 synch point, and intraline difference marking is done on the
2025-07-01 05:47:40.495 similar pair. Lots of work, but often worth it.
2025-07-01 05:47:40.500
2025-07-01 05:47:40.510 Example:
2025-07-01 05:47:40.519
2025-07-01 05:47:40.527 >>> d = Differ()
2025-07-01 05:47:40.535 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:47:40.542 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:47:40.553 >>> print(''.join(results), end="")
2025-07-01 05:47:40.566 - abcDefghiJkl
2025-07-01 05:47:40.585 + abcdefGhijkl
2025-07-01 05:47:40.601 """
2025-07-01 05:47:40.607
2025-07-01 05:47:40.614 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:47:40.623 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:47:40.630 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:47:40.639 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:47:40.650 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:47:40.658
2025-07-01 05:47:40.664 # search for the pair that matches best without being identical
2025-07-01 05:47:40.670 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:47:40.675 # on junk -- unless we have to)
2025-07-01 05:47:40.683 for j in range(blo, bhi):
2025-07-01 05:47:40.693 bj = b[j]
2025-07-01 05:47:40.702 cruncher.set_seq2(bj)
2025-07-01 05:47:40.708 for i in range(alo, ahi):
2025-07-01 05:47:40.714 ai = a[i]
2025-07-01 05:47:40.721 if ai == bj:
2025-07-01 05:47:40.732 if eqi is None:
2025-07-01 05:47:40.743 eqi, eqj = i, j
2025-07-01 05:47:40.753 continue
2025-07-01 05:47:40.760 cruncher.set_seq1(ai)
2025-07-01 05:47:40.767 # computing similarity is expensive, so use the quick
2025-07-01 05:47:40.780 # upper bounds first -- have seen this speed up messy
2025-07-01 05:47:40.787 # compares by a factor of 3.
2025-07-01 05:47:40.794 # note that ratio() is only expensive to compute the first
2025-07-01 05:47:40.803 # time it's called on a sequence pair; the expensive part
2025-07-01 05:47:40.814 # of the computation is cached by cruncher
2025-07-01 05:47:40.822 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:47:40.829 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:47:40.835 cruncher.ratio() > best_ratio:
2025-07-01 05:47:40.841 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:47:40.851 if best_ratio < cutoff:
2025-07-01 05:47:40.864 # no non-identical "pretty close" pair
2025-07-01 05:47:40.871 if eqi is None:
2025-07-01 05:47:40.881 # no identical pair either -- treat it as a straight replace
2025-07-01 05:47:40.892 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:47:40.904 return
2025-07-01 05:47:40.913 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:47:40.921 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:47:40.928 else:
2025-07-01 05:47:40.940 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:47:40.948 eqi = None
2025-07-01 05:47:40.957
2025-07-01 05:47:40.964 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:47:40.969 # identical
2025-07-01 05:47:40.974
2025-07-01 05:47:40.979 # pump out diffs from before the synch point
2025-07-01 05:47:40.985 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:47:40.990
2025-07-01 05:47:40.997 # do intraline marking on the synch pair
2025-07-01 05:47:41.005 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:47:41.012 if eqi is None:
2025-07-01 05:47:41.018 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:47:41.024 atags = btags = ""
2025-07-01 05:47:41.031 cruncher.set_seqs(aelt, belt)
2025-07-01 05:47:41.039 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:47:41.046 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:47:41.054 if tag == 'replace':
2025-07-01 05:47:41.061 atags += '^' * la
2025-07-01 05:47:41.069 btags += '^' * lb
2025-07-01 05:47:41.077 elif tag == 'delete':
2025-07-01 05:47:41.084 atags += '-' * la
2025-07-01 05:47:41.089 elif tag == 'insert':
2025-07-01 05:47:41.094 btags += '+' * lb
2025-07-01 05:47:41.099 elif tag == 'equal':
2025-07-01 05:47:41.104 atags += ' ' * la
2025-07-01 05:47:41.110 btags += ' ' * lb
2025-07-01 05:47:41.116 else:
2025-07-01 05:47:41.123 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:47:41.134 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:47:41.143 else:
2025-07-01 05:47:41.151 # the synch pair is identical
2025-07-01 05:47:41.158 yield ' ' + aelt
2025-07-01 05:47:41.165
2025-07-01 05:47:41.173 # pump out diffs from after the synch point
2025-07-01 05:47:41.180 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:47:41.186
2025-07-01 05:47:41.193 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:47:41.200 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:47:41.207
2025-07-01 05:47:41.221 self = <difflib.Differ object at [hex]>
2025-07-01 05:47:41.232 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:47:41.239 alo = 248, ahi = 1101
2025-07-01 05:47:41.247 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:47:41.255 blo = 248, bhi = 1101
2025-07-01 05:47:41.265
2025-07-01 05:47:41.274 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:47:41.281 g = []
2025-07-01 05:47:41.287 if alo < ahi:
2025-07-01 05:47:41.293 if blo < bhi:
2025-07-01 05:47:41.299 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:47:41.304 else:
2025-07-01 05:47:41.309 g = self._dump('-', a, alo, ahi)
2025-07-01 05:47:41.315 elif blo < bhi:
2025-07-01 05:47:41.320 g = self._dump('+', b, blo, bhi)
2025-07-01 05:47:41.325
2025-07-01 05:47:41.331 > yield from g
2025-07-01 05:47:41.341
2025-07-01 05:47:41.348 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:47:41.358 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:47:41.365
2025-07-01 05:47:41.372 self = <difflib.Differ object at [hex]>
2025-07-01 05:47:41.380 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:47:41.387 alo = 248, ahi = 1101
2025-07-01 05:47:41.395 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:47:41.402 blo = 248, bhi = 1101
2025-07-01 05:47:41.411
2025-07-01 05:47:41.422 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:47:41.432 r"""
2025-07-01 05:47:41.439 When replacing one block of lines with another, search the blocks
2025-07-01 05:47:41.450 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:47:41.458 synch point, and intraline difference marking is done on the
2025-07-01 05:47:41.464 similar pair. Lots of work, but often worth it.
2025-07-01 05:47:41.470
2025-07-01 05:47:41.476 Example:
2025-07-01 05:47:41.483
2025-07-01 05:47:41.491 >>> d = Differ()
2025-07-01 05:47:41.502 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:47:41.509 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:47:41.515 >>> print(''.join(results), end="")
2025-07-01 05:47:41.522 - abcDefghiJkl
2025-07-01 05:47:41.541 + abcdefGhijkl
2025-07-01 05:47:41.554 """
2025-07-01 05:47:41.560
2025-07-01 05:47:41.566 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:47:41.572 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:47:41.578 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:47:41.586 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:47:41.597 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:47:41.606
2025-07-01 05:47:41.613 # search for the pair that matches best without being identical
2025-07-01 05:47:41.620 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:47:41.627 # on junk -- unless we have to)
2025-07-01 05:47:41.635 for j in range(blo, bhi):
2025-07-01 05:47:41.645 bj = b[j]
2025-07-01 05:47:41.652 cruncher.set_seq2(bj)
2025-07-01 05:47:41.658 for i in range(alo, ahi):
2025-07-01 05:47:41.663 ai = a[i]
2025-07-01 05:47:41.668 if ai == bj:
2025-07-01 05:47:41.673 if eqi is None:
2025-07-01 05:47:41.681 eqi, eqj = i, j
2025-07-01 05:47:41.686 continue
2025-07-01 05:47:41.692 cruncher.set_seq1(ai)
2025-07-01 05:47:41.698 # computing similarity is expensive, so use the quick
2025-07-01 05:47:41.705 # upper bounds first -- have seen this speed up messy
2025-07-01 05:47:41.711 # compares by a factor of 3.
2025-07-01 05:47:41.717 # note that ratio() is only expensive to compute the first
2025-07-01 05:47:41.725 # time it's called on a sequence pair; the expensive part
2025-07-01 05:47:41.732 # of the computation is cached by cruncher
2025-07-01 05:47:41.738 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:47:41.746 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:47:41.751 cruncher.ratio() > best_ratio:
2025-07-01 05:47:41.759 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:47:41.769 if best_ratio < cutoff:
2025-07-01 05:47:41.778 # no non-identical "pretty close" pair
2025-07-01 05:47:41.788 if eqi is None:
2025-07-01 05:47:41.800 # no identical pair either -- treat it as a straight replace
2025-07-01 05:47:41.810 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:47:41.820 return
2025-07-01 05:47:41.829 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:47:41.840 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:47:41.850 else:
2025-07-01 05:47:41.861 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:47:41.872 eqi = None
2025-07-01 05:47:41.880
2025-07-01 05:47:41.888 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:47:41.895 # identical
2025-07-01 05:47:41.901
2025-07-01 05:47:41.907 # pump out diffs from before the synch point
2025-07-01 05:47:41.912 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:47:41.918
2025-07-01 05:47:41.923 # do intraline marking on the synch pair
2025-07-01 05:47:41.931 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:47:41.941 if eqi is None:
2025-07-01 05:47:41.950 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:47:41.957 atags = btags = ""
2025-07-01 05:47:41.963 cruncher.set_seqs(aelt, belt)
2025-07-01 05:47:41.970 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:47:41.980 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:47:41.988 if tag == 'replace':
2025-07-01 05:47:41.995 atags += '^' * la
2025-07-01 05:47:42.002 btags += '^' * lb
2025-07-01 05:47:42.008 elif tag == 'delete':
2025-07-01 05:47:42.014 atags += '-' * la
2025-07-01 05:47:42.019 elif tag == 'insert':
2025-07-01 05:47:42.025 btags += '+' * lb
2025-07-01 05:47:42.032 elif tag == 'equal':
2025-07-01 05:47:42.038 atags += ' ' * la
2025-07-01 05:47:42.043 btags += ' ' * lb
2025-07-01 05:47:42.049 else:
2025-07-01 05:47:42.055 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:47:42.061 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:47:42.067 else:
2025-07-01 05:47:42.073 # the synch pair is identical
2025-07-01 05:47:42.079 yield ' ' + aelt
2025-07-01 05:47:42.086
2025-07-01 05:47:42.093 # pump out diffs from after the synch point
2025-07-01 05:47:42.100 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:47:42.107
2025-07-01 05:47:42.121 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:47:42.131 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:47:42.142
2025-07-01 05:47:42.150 self = <difflib.Differ object at [hex]>
2025-07-01 05:47:42.157 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:47:42.164 alo = 249, ahi = 1101
2025-07-01 05:47:42.170 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:47:42.175 blo = 249, bhi = 1101
2025-07-01 05:47:42.181
2025-07-01 05:47:42.186 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:47:42.192 g = []
2025-07-01 05:47:42.199 if alo < ahi:
2025-07-01 05:47:42.209 if blo < bhi:
2025-07-01 05:47:42.220 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:47:42.228 else:
2025-07-01 05:47:42.235 g = self._dump('-', a, alo, ahi)
2025-07-01 05:47:42.243 elif blo < bhi:
2025-07-01 05:47:42.254 g = self._dump('+', b, blo, bhi)
2025-07-01 05:47:42.262
2025-07-01 05:47:42.268 > yield from g
2025-07-01 05:47:42.274
2025-07-01 05:47:42.284 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:47:42.294 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:47:42.302
2025-07-01 05:47:42.313 self = <difflib.Differ object at [hex]>
2025-07-01 05:47:42.323 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:47:42.331 alo = 249, ahi = 1101
2025-07-01 05:47:42.338 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:47:42.344 blo = 249, bhi = 1101
2025-07-01 05:47:42.350
2025-07-01 05:47:42.356 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:47:42.369 r"""
2025-07-01 05:47:42.379 When replacing one block of lines with another, search the blocks
2025-07-01 05:47:42.389 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:47:42.395 synch point, and intraline difference marking is done on the
2025-07-01 05:47:42.408 similar pair. Lots of work, but often worth it.
2025-07-01 05:47:42.418
2025-07-01 05:47:42.425 Example:
2025-07-01 05:47:42.430
2025-07-01 05:47:42.435 >>> d = Differ()
2025-07-01 05:47:42.440 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:47:42.444 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:47:42.449 >>> print(''.join(results), end="")
2025-07-01 05:47:42.453 - abcDefghiJkl
2025-07-01 05:47:42.464 + abcdefGhijkl
2025-07-01 05:47:42.475 """
2025-07-01 05:47:42.484
2025-07-01 05:47:42.491 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:47:42.496 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:47:42.501 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:47:42.506 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:47:42.510 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:47:42.514
2025-07-01 05:47:42.519 # search for the pair that matches best without being identical
2025-07-01 05:47:42.523 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:47:42.528 # on junk -- unless we have to)
2025-07-01 05:47:42.533 for j in range(blo, bhi):
2025-07-01 05:47:42.537 bj = b[j]
2025-07-01 05:47:42.541 cruncher.set_seq2(bj)
2025-07-01 05:47:42.546 for i in range(alo, ahi):
2025-07-01 05:47:42.556 ai = a[i]
2025-07-01 05:47:42.562 if ai == bj:
2025-07-01 05:47:42.568 if eqi is None:
2025-07-01 05:47:42.578 eqi, eqj = i, j
2025-07-01 05:47:42.586 continue
2025-07-01 05:47:42.594 cruncher.set_seq1(ai)
2025-07-01 05:47:42.599 # computing similarity is expensive, so use the quick
2025-07-01 05:47:42.604 # upper bounds first -- have seen this speed up messy
2025-07-01 05:47:42.609 # compares by a factor of 3.
2025-07-01 05:47:42.617 # note that ratio() is only expensive to compute the first
2025-07-01 05:47:42.629 # time it's called on a sequence pair; the expensive part
2025-07-01 05:47:42.638 # of the computation is cached by cruncher
2025-07-01 05:47:42.649 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:47:42.658 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:47:42.665 cruncher.ratio() > best_ratio:
2025-07-01 05:47:42.672 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:47:42.677 if best_ratio < cutoff:
2025-07-01 05:47:42.683 # no non-identical "pretty close" pair
2025-07-01 05:47:42.688 if eqi is None:
2025-07-01 05:47:42.695 # no identical pair either -- treat it as a straight replace
2025-07-01 05:47:42.701 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:47:42.706 return
2025-07-01 05:47:42.717 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:47:42.727 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:47:42.737 else:
2025-07-01 05:47:42.750 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:47:42.758 eqi = None
2025-07-01 05:47:42.769
2025-07-01 05:47:42.779 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:47:42.785 # identical
2025-07-01 05:47:42.792
2025-07-01 05:47:42.798 # pump out diffs from before the synch point
2025-07-01 05:47:42.804 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:47:42.810
2025-07-01 05:47:42.816 # do intraline marking on the synch pair
2025-07-01 05:47:42.822 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:47:42.831 if eqi is None:
2025-07-01 05:47:42.841 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:47:42.849 atags = btags = ""
2025-07-01 05:47:42.855 cruncher.set_seqs(aelt, belt)
2025-07-01 05:47:42.861 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:47:42.866 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:47:42.871 if tag == 'replace':
2025-07-01 05:47:42.875 atags += '^' * la
2025-07-01 05:47:42.880 btags += '^' * lb
2025-07-01 05:47:42.884 elif tag == 'delete':
2025-07-01 05:47:42.890 atags += '-' * la
2025-07-01 05:47:42.895 elif tag == 'insert':
2025-07-01 05:47:42.901 btags += '+' * lb
2025-07-01 05:47:42.906 elif tag == 'equal':
2025-07-01 05:47:42.913 atags += ' ' * la
2025-07-01 05:47:42.921 btags += ' ' * lb
2025-07-01 05:47:42.927 else:
2025-07-01 05:47:42.933 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:47:42.938 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:47:42.944 else:
2025-07-01 05:47:42.950 # the synch pair is identical
2025-07-01 05:47:42.956 yield ' ' + aelt
2025-07-01 05:47:42.962
2025-07-01 05:47:42.969 # pump out diffs from after the synch point
2025-07-01 05:47:42.979 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:47:42.986
2025-07-01 05:47:42.992 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:47:42.998 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:47:43.011
2025-07-01 05:47:43.021 self = <difflib.Differ object at [hex]>
2025-07-01 05:47:43.030 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:47:43.040 alo = 250, ahi = 1101
2025-07-01 05:47:43.048 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:47:43.055 blo = 250, bhi = 1101
2025-07-01 05:47:43.061
2025-07-01 05:47:43.067 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:47:43.072 g = []
2025-07-01 05:47:43.078 if alo < ahi:
2025-07-01 05:47:43.084 if blo < bhi:
2025-07-01 05:47:43.092 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:47:43.099 else:
2025-07-01 05:47:43.107 g = self._dump('-', a, alo, ahi)
2025-07-01 05:47:43.118 elif blo < bhi:
2025-07-01 05:47:43.129 g = self._dump('+', b, blo, bhi)
2025-07-01 05:47:43.140
2025-07-01 05:47:43.153 > yield from g
2025-07-01 05:47:43.166
2025-07-01 05:47:43.177 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:47:43.190 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:47:43.203
2025-07-01 05:47:43.214 self = <difflib.Differ object at [hex]>
2025-07-01 05:47:43.230 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:47:43.240 alo = 250, ahi = 1101
2025-07-01 05:47:43.254 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:47:43.266 blo = 250, bhi = 1101
2025-07-01 05:47:43.275
2025-07-01 05:47:43.283 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:47:43.290 r"""
2025-07-01 05:47:43.298 When replacing one block of lines with another, search the blocks
2025-07-01 05:47:43.306 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:47:43.314 synch point, and intraline difference marking is done on the
2025-07-01 05:47:43.321 similar pair. Lots of work, but often worth it.
2025-07-01 05:47:43.327
2025-07-01 05:47:43.334 Example:
2025-07-01 05:47:43.345
2025-07-01 05:47:43.350 >>> d = Differ()
2025-07-01 05:47:43.356 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:47:43.362 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:47:43.368 >>> print(''.join(results), end="")
2025-07-01 05:47:43.376 - abcDefghiJkl
2025-07-01 05:47:43.399 + abcdefGhijkl
2025-07-01 05:47:43.415 """
2025-07-01 05:47:43.420
2025-07-01 05:47:43.426 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:47:43.432 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:47:43.437 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:47:43.441 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:47:43.446 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:47:43.450
2025-07-01 05:47:43.456 # search for the pair that matches best without being identical
2025-07-01 05:47:43.461 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:47:43.467 # on junk -- unless we have to)
2025-07-01 05:47:43.473 for j in range(blo, bhi):
2025-07-01 05:47:43.481 bj = b[j]
2025-07-01 05:47:43.488 cruncher.set_seq2(bj)
2025-07-01 05:47:43.498 for i in range(alo, ahi):
2025-07-01 05:47:43.507 ai = a[i]
2025-07-01 05:47:43.518 if ai == bj:
2025-07-01 05:47:43.526 if eqi is None:
2025-07-01 05:47:43.532 eqi, eqj = i, j
2025-07-01 05:47:43.539 continue
2025-07-01 05:47:43.546 cruncher.set_seq1(ai)
2025-07-01 05:47:43.556 # computing similarity is expensive, so use the quick
2025-07-01 05:47:43.565 # upper bounds first -- have seen this speed up messy
2025-07-01 05:47:43.571 # compares by a factor of 3.
2025-07-01 05:47:43.577 # note that ratio() is only expensive to compute the first
2025-07-01 05:47:43.582 # time it's called on a sequence pair; the expensive part
2025-07-01 05:47:43.588 # of the computation is cached by cruncher
2025-07-01 05:47:43.594 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:47:43.600 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:47:43.606 cruncher.ratio() > best_ratio:
2025-07-01 05:47:43.612 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:47:43.619 if best_ratio < cutoff:
2025-07-01 05:47:43.624 # no non-identical "pretty close" pair
2025-07-01 05:47:43.630 if eqi is None:
2025-07-01 05:47:43.641 # no identical pair either -- treat it as a straight replace
2025-07-01 05:47:43.651 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:47:43.659 return
2025-07-01 05:47:43.665 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:47:43.671 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:47:43.677 else:
2025-07-01 05:47:43.683 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:47:43.690 eqi = None
2025-07-01 05:47:43.700
2025-07-01 05:47:43.707 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:47:43.713 # identical
2025-07-01 05:47:43.725
2025-07-01 05:47:43.736 # pump out diffs from before the synch point
2025-07-01 05:47:43.745 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:47:43.752
2025-07-01 05:47:43.762 # do intraline marking on the synch pair
2025-07-01 05:47:43.768 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:47:43.775 if eqi is None:
2025-07-01 05:47:43.782 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:47:43.791 atags = btags = ""
2025-07-01 05:47:43.802 cruncher.set_seqs(aelt, belt)
2025-07-01 05:47:43.816 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:47:43.825 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:47:43.832 if tag == 'replace':
2025-07-01 05:47:43.844 atags += '^' * la
2025-07-01 05:47:43.855 btags += '^' * lb
2025-07-01 05:47:43.863 elif tag == 'delete':
2025-07-01 05:47:43.870 atags += '-' * la
2025-07-01 05:47:43.876 elif tag == 'insert':
2025-07-01 05:47:43.882 btags += '+' * lb
2025-07-01 05:47:43.889 elif tag == 'equal':
2025-07-01 05:47:43.896 atags += ' ' * la
2025-07-01 05:47:43.903 btags += ' ' * lb
2025-07-01 05:47:43.910 else:
2025-07-01 05:47:43.921 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:47:43.932 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:47:43.939 else:
2025-07-01 05:47:43.948 # the synch pair is identical
2025-07-01 05:47:43.958 yield ' ' + aelt
2025-07-01 05:47:43.966
2025-07-01 05:47:43.973 # pump out diffs from after the synch point
2025-07-01 05:47:43.982 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:47:43.992
2025-07-01 05:47:44.000 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:47:44.007 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:47:44.011
2025-07-01 05:47:44.016 self = <difflib.Differ object at [hex]>
2025-07-01 05:47:44.021 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:47:44.026 alo = 251, ahi = 1101
2025-07-01 05:47:44.032 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:47:44.038 blo = 251, bhi = 1101
2025-07-01 05:47:44.043
2025-07-01 05:47:44.049 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:47:44.055 g = []
2025-07-01 05:47:44.066 if alo < ahi:
2025-07-01 05:47:44.075 if blo < bhi:
2025-07-01 05:47:44.082 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:47:44.088 else:
2025-07-01 05:47:44.094 g = self._dump('-', a, alo, ahi)
2025-07-01 05:47:44.105 elif blo < bhi:
2025-07-01 05:47:44.115 g = self._dump('+', b, blo, bhi)
2025-07-01 05:47:44.122
2025-07-01 05:47:44.128 > yield from g
2025-07-01 05:47:44.134
2025-07-01 05:47:44.139 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:47:44.144 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:47:44.148
2025-07-01 05:47:44.153 self = <difflib.Differ object at [hex]>
2025-07-01 05:47:44.167 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:47:44.173 alo = 251, ahi = 1101
2025-07-01 05:47:44.180 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:47:44.186 blo = 251, bhi = 1101
2025-07-01 05:47:44.198
2025-07-01 05:47:44.204 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:47:44.210 r"""
2025-07-01 05:47:44.218 When replacing one block of lines with another, search the blocks
2025-07-01 05:47:44.226 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:47:44.233 synch point, and intraline difference marking is done on the
2025-07-01 05:47:44.240 similar pair. Lots of work, but often worth it.
2025-07-01 05:47:44.246
2025-07-01 05:47:44.252 Example:
2025-07-01 05:47:44.258
2025-07-01 05:47:44.264 >>> d = Differ()
2025-07-01 05:47:44.271 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:47:44.279 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:47:44.286 >>> print(''.join(results), end="")
2025-07-01 05:47:44.292 - abcDefghiJkl
2025-07-01 05:47:44.305 + abcdefGhijkl
2025-07-01 05:47:44.318 """
2025-07-01 05:47:44.325
2025-07-01 05:47:44.332 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:47:44.339 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:47:44.346 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:47:44.358 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:47:44.367 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:47:44.374
2025-07-01 05:47:44.387 # search for the pair that matches best without being identical
2025-07-01 05:47:44.397 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:47:44.404 # on junk -- unless we have to)
2025-07-01 05:47:44.410 for j in range(blo, bhi):
2025-07-01 05:47:44.416 bj = b[j]
2025-07-01 05:47:44.423 cruncher.set_seq2(bj)
2025-07-01 05:47:44.434 for i in range(alo, ahi):
2025-07-01 05:47:44.445 ai = a[i]
2025-07-01 05:47:44.456 if ai == bj:
2025-07-01 05:47:44.467 if eqi is None:
2025-07-01 05:47:44.474 eqi, eqj = i, j
2025-07-01 05:47:44.482 continue
2025-07-01 05:47:44.492 cruncher.set_seq1(ai)
2025-07-01 05:47:44.500 # computing similarity is expensive, so use the quick
2025-07-01 05:47:44.507 # upper bounds first -- have seen this speed up messy
2025-07-01 05:47:44.515 # compares by a factor of 3.
2025-07-01 05:47:44.525 # note that ratio() is only expensive to compute the first
2025-07-01 05:47:44.534 # time it's called on a sequence pair; the expensive part
2025-07-01 05:47:44.542 # of the computation is cached by cruncher
2025-07-01 05:47:44.553 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:47:44.565 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:47:44.577 cruncher.ratio() > best_ratio:
2025-07-01 05:47:44.587 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:47:44.595 if best_ratio < cutoff:
2025-07-01 05:47:44.602 # no non-identical "pretty close" pair
2025-07-01 05:47:44.608 if eqi is None:
2025-07-01 05:47:44.614 # no identical pair either -- treat it as a straight replace
2025-07-01 05:47:44.620 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:47:44.626 return
2025-07-01 05:47:44.636 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:47:44.646 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:47:44.653 else:
2025-07-01 05:47:44.660 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:47:44.666 eqi = None
2025-07-01 05:47:44.673
2025-07-01 05:47:44.680 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:47:44.685 # identical
2025-07-01 05:47:44.691
2025-07-01 05:47:44.696 # pump out diffs from before the synch point
2025-07-01 05:47:44.702 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:47:44.707
2025-07-01 05:47:44.712 # do intraline marking on the synch pair
2025-07-01 05:47:44.718 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:47:44.725 if eqi is None:
2025-07-01 05:47:44.731 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:47:44.737 atags = btags = ""
2025-07-01 05:47:44.743 cruncher.set_seqs(aelt, belt)
2025-07-01 05:47:44.748 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:47:44.754 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:47:44.764 if tag == 'replace':
2025-07-01 05:47:44.774 atags += '^' * la
2025-07-01 05:47:44.784 btags += '^' * lb
2025-07-01 05:47:44.797 elif tag == 'delete':
2025-07-01 05:47:44.807 atags += '-' * la
2025-07-01 05:47:44.819 elif tag == 'insert':
2025-07-01 05:47:44.833 btags += '+' * lb
2025-07-01 05:47:44.842 elif tag == 'equal':
2025-07-01 05:47:44.849 atags += ' ' * la
2025-07-01 05:47:44.860 btags += ' ' * lb
2025-07-01 05:47:44.869 else:
2025-07-01 05:47:44.877 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:47:44.883 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:47:44.890 else:
2025-07-01 05:47:44.900 # the synch pair is identical
2025-07-01 05:47:44.910 yield ' ' + aelt
2025-07-01 05:47:44.919
2025-07-01 05:47:44.927 # pump out diffs from after the synch point
2025-07-01 05:47:44.934 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:47:44.940
2025-07-01 05:47:44.947 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:47:44.954 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:47:44.961
2025-07-01 05:47:44.973 self = <difflib.Differ object at [hex]>
2025-07-01 05:47:44.982 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:47:44.989 alo = 252, ahi = 1101
2025-07-01 05:47:44.995 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:47:45.004 blo = 252, bhi = 1101
2025-07-01 05:47:45.014
2025-07-01 05:47:45.022 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:47:45.029 g = []
2025-07-01 05:47:45.035 if alo < ahi:
2025-07-01 05:47:45.040 if blo < bhi:
2025-07-01 05:47:45.045 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:47:45.049 else:
2025-07-01 05:47:45.054 g = self._dump('-', a, alo, ahi)
2025-07-01 05:47:45.058 elif blo < bhi:
2025-07-01 05:47:45.063 g = self._dump('+', b, blo, bhi)
2025-07-01 05:47:45.067
2025-07-01 05:47:45.072 > yield from g
2025-07-01 05:47:45.077
2025-07-01 05:47:45.081 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:47:45.086 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:47:45.090
2025-07-01 05:47:45.095 self = <difflib.Differ object at [hex]>
2025-07-01 05:47:45.100 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:47:45.105 alo = 252, ahi = 1101
2025-07-01 05:47:45.110 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:47:45.114 blo = 252, bhi = 1101
2025-07-01 05:47:45.119
2025-07-01 05:47:45.123 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:47:45.128 r"""
2025-07-01 05:47:45.134 When replacing one block of lines with another, search the blocks
2025-07-01 05:47:45.139 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:47:45.143 synch point, and intraline difference marking is done on the
2025-07-01 05:47:45.149 similar pair. Lots of work, but often worth it.
2025-07-01 05:47:45.154
2025-07-01 05:47:45.159 Example:
2025-07-01 05:47:45.164
2025-07-01 05:47:45.168 >>> d = Differ()
2025-07-01 05:47:45.173 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:47:45.178 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:47:45.182 >>> print(''.join(results), end="")
2025-07-01 05:47:45.188 - abcDefghiJkl
2025-07-01 05:47:45.199 + abcdefGhijkl
2025-07-01 05:47:45.210 """
2025-07-01 05:47:45.215
2025-07-01 05:47:45.220 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:47:45.224 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:47:45.229 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:47:45.233 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:47:45.238 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:47:45.243
2025-07-01 05:47:45.248 # search for the pair that matches best without being identical
2025-07-01 05:47:45.253 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:47:45.264 # on junk -- unless we have to)
2025-07-01 05:47:45.274 for j in range(blo, bhi):
2025-07-01 05:47:45.280 bj = b[j]
2025-07-01 05:47:45.286 cruncher.set_seq2(bj)
2025-07-01 05:47:45.293 for i in range(alo, ahi):
2025-07-01 05:47:45.304 ai = a[i]
2025-07-01 05:47:45.315 if ai == bj:
2025-07-01 05:47:45.325 if eqi is None:
2025-07-01 05:47:45.332 eqi, eqj = i, j
2025-07-01 05:47:45.340 continue
2025-07-01 05:47:45.348 cruncher.set_seq1(ai)
2025-07-01 05:47:45.354 # computing similarity is expensive, so use the quick
2025-07-01 05:47:45.360 # upper bounds first -- have seen this speed up messy
2025-07-01 05:47:45.366 # compares by a factor of 3.
2025-07-01 05:47:45.372 # note that ratio() is only expensive to compute the first
2025-07-01 05:47:45.378 # time it's called on a sequence pair; the expensive part
2025-07-01 05:47:45.384 # of the computation is cached by cruncher
2025-07-01 05:47:45.390 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:47:45.396 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:47:45.409 cruncher.ratio() > best_ratio:
2025-07-01 05:47:45.419 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:47:45.428 if best_ratio < cutoff:
2025-07-01 05:47:45.436 # no non-identical "pretty close" pair
2025-07-01 05:47:45.442 if eqi is None:
2025-07-01 05:47:45.449 # no identical pair either -- treat it as a straight replace
2025-07-01 05:47:45.455 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:47:45.463 return
2025-07-01 05:47:45.473 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:47:45.481 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:47:45.487 else:
2025-07-01 05:47:45.495 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:47:45.504 eqi = None
2025-07-01 05:47:45.513
2025-07-01 05:47:45.521 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:47:45.528 # identical
2025-07-01 05:47:45.533
2025-07-01 05:47:45.538 # pump out diffs from before the synch point
2025-07-01 05:47:45.543 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:47:45.547
2025-07-01 05:47:45.552 # do intraline marking on the synch pair
2025-07-01 05:47:45.556 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:47:45.567 if eqi is None:
2025-07-01 05:47:45.577 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:47:45.585 atags = btags = ""
2025-07-01 05:47:45.593 cruncher.set_seqs(aelt, belt)
2025-07-01 05:47:45.599 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:47:45.607 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:47:45.618 if tag == 'replace':
2025-07-01 05:47:45.630 atags += '^' * la
2025-07-01 05:47:45.639 btags += '^' * lb
2025-07-01 05:47:45.647 elif tag == 'delete':
2025-07-01 05:47:45.654 atags += '-' * la
2025-07-01 05:47:45.662 elif tag == 'insert':
2025-07-01 05:47:45.673 btags += '+' * lb
2025-07-01 05:47:45.681 elif tag == 'equal':
2025-07-01 05:47:45.688 atags += ' ' * la
2025-07-01 05:47:45.700 btags += ' ' * lb
2025-07-01 05:47:45.712 else:
2025-07-01 05:47:45.724 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:47:45.734 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:47:45.743 else:
2025-07-01 05:47:45.754 # the synch pair is identical
2025-07-01 05:47:45.766 yield ' ' + aelt
2025-07-01 05:47:45.778
2025-07-01 05:47:45.790 # pump out diffs from after the synch point
2025-07-01 05:47:45.800 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:47:45.807
2025-07-01 05:47:45.814 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:47:45.825 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:47:45.833
2025-07-01 05:47:45.841 self = <difflib.Differ object at [hex]>
2025-07-01 05:47:45.853 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:47:45.867 alo = 253, ahi = 1101
2025-07-01 05:47:45.877 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:47:45.886 blo = 253, bhi = 1101
2025-07-01 05:47:45.894
2025-07-01 05:47:45.903 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:47:45.909 g = []
2025-07-01 05:47:45.916 if alo < ahi:
2025-07-01 05:47:45.922 if blo < bhi:
2025-07-01 05:47:45.928 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:47:45.935 else:
2025-07-01 05:47:45.941 g = self._dump('-', a, alo, ahi)
2025-07-01 05:47:45.947 elif blo < bhi:
2025-07-01 05:47:45.952 g = self._dump('+', b, blo, bhi)
2025-07-01 05:47:45.958
2025-07-01 05:47:45.967 > yield from g
2025-07-01 05:47:45.974
2025-07-01 05:47:45.981 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:47:45.987 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:47:45.993
2025-07-01 05:47:45.998 self = <difflib.Differ object at [hex]>
2025-07-01 05:47:46.010 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:47:46.015 alo = 253, ahi = 1101
2025-07-01 05:47:46.021 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:47:46.026 blo = 253, bhi = 1101
2025-07-01 05:47:46.030
2025-07-01 05:47:46.036 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:47:46.040 r"""
2025-07-01 05:47:46.045 When replacing one block of lines with another, search the blocks
2025-07-01 05:47:46.050 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:47:46.055 synch point, and intraline difference marking is done on the
2025-07-01 05:47:46.060 similar pair. Lots of work, but often worth it.
2025-07-01 05:47:46.068
2025-07-01 05:47:46.075 Example:
2025-07-01 05:47:46.082
2025-07-01 05:47:46.087 >>> d = Differ()
2025-07-01 05:47:46.093 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:47:46.099 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:47:46.105 >>> print(''.join(results), end="")
2025-07-01 05:47:46.111 - abcDefghiJkl
2025-07-01 05:47:46.122 + abcdefGhijkl
2025-07-01 05:47:46.135 """
2025-07-01 05:47:46.142
2025-07-01 05:47:46.154 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:47:46.161 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:47:46.168 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:47:46.176 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:47:46.183 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:47:46.190
2025-07-01 05:47:46.199 # search for the pair that matches best without being identical
2025-07-01 05:47:46.208 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:47:46.215 # on junk -- unless we have to)
2025-07-01 05:47:46.222 for j in range(blo, bhi):
2025-07-01 05:47:46.228 bj = b[j]
2025-07-01 05:47:46.234 cruncher.set_seq2(bj)
2025-07-01 05:47:46.243 for i in range(alo, ahi):
2025-07-01 05:47:46.252 ai = a[i]
2025-07-01 05:47:46.258 if ai == bj:
2025-07-01 05:47:46.265 if eqi is None:
2025-07-01 05:47:46.271 eqi, eqj = i, j
2025-07-01 05:47:46.277 continue
2025-07-01 05:47:46.283 cruncher.set_seq1(ai)
2025-07-01 05:47:46.291 # computing similarity is expensive, so use the quick
2025-07-01 05:47:46.299 # upper bounds first -- have seen this speed up messy
2025-07-01 05:47:46.311 # compares by a factor of 3.
2025-07-01 05:47:46.319 # note that ratio() is only expensive to compute the first
2025-07-01 05:47:46.326 # time it's called on a sequence pair; the expensive part
2025-07-01 05:47:46.331 # of the computation is cached by cruncher
2025-07-01 05:47:46.336 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:47:46.341 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:47:46.347 cruncher.ratio() > best_ratio:
2025-07-01 05:47:46.354 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:47:46.360 if best_ratio < cutoff:
2025-07-01 05:47:46.367 # no non-identical "pretty close" pair
2025-07-01 05:47:46.374 if eqi is None:
2025-07-01 05:47:46.384 # no identical pair either -- treat it as a straight replace
2025-07-01 05:47:46.396 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:47:46.405 return
2025-07-01 05:47:46.412 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:47:46.418 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:47:46.429 else:
2025-07-01 05:47:46.439 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:47:46.446 eqi = None
2025-07-01 05:47:46.452
2025-07-01 05:47:46.458 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:47:46.468 # identical
2025-07-01 05:47:46.477
2025-07-01 05:47:46.489 # pump out diffs from before the synch point
2025-07-01 05:47:46.500 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:47:46.510
2025-07-01 05:47:46.520 # do intraline marking on the synch pair
2025-07-01 05:47:46.532 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:47:46.545 if eqi is None:
2025-07-01 05:47:46.551 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:47:46.557 atags = btags = ""
2025-07-01 05:47:46.564 cruncher.set_seqs(aelt, belt)
2025-07-01 05:47:46.569 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:47:46.578 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:47:46.587 if tag == 'replace':
2025-07-01 05:47:46.594 atags += '^' * la
2025-07-01 05:47:46.601 btags += '^' * lb
2025-07-01 05:47:46.608 elif tag == 'delete':
2025-07-01 05:47:46.621 atags += '-' * la
2025-07-01 05:47:46.631 elif tag == 'insert':
2025-07-01 05:47:46.639 btags += '+' * lb
2025-07-01 05:47:46.646 elif tag == 'equal':
2025-07-01 05:47:46.652 atags += ' ' * la
2025-07-01 05:47:46.659 btags += ' ' * lb
2025-07-01 05:47:46.669 else:
2025-07-01 05:47:46.677 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:47:46.684 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:47:46.689 else:
2025-07-01 05:47:46.694 # the synch pair is identical
2025-07-01 05:47:46.700 yield ' ' + aelt
2025-07-01 05:47:46.705
2025-07-01 05:47:46.710 # pump out diffs from after the synch point
2025-07-01 05:47:46.716 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:47:46.721
2025-07-01 05:47:46.734 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:47:46.743 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:47:46.751
2025-07-01 05:47:46.758 self = <difflib.Differ object at [hex]>
2025-07-01 05:47:46.768 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:47:46.779 alo = 254, ahi = 1101
2025-07-01 05:47:46.787 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:47:46.794 blo = 254, bhi = 1101
2025-07-01 05:47:46.804
2025-07-01 05:47:46.811 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:47:46.817 g = []
2025-07-01 05:47:46.823 if alo < ahi:
2025-07-01 05:47:46.828 if blo < bhi:
2025-07-01 05:47:46.833 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:47:46.838 else:
2025-07-01 05:47:46.852 g = self._dump('-', a, alo, ahi)
2025-07-01 05:47:46.862 elif blo < bhi:
2025-07-01 05:47:46.871 g = self._dump('+', b, blo, bhi)
2025-07-01 05:47:46.878
2025-07-01 05:47:46.888 > yield from g
2025-07-01 05:47:46.894
2025-07-01 05:47:46.901 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:47:46.907 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:47:46.913
2025-07-01 05:47:46.921 self = <difflib.Differ object at [hex]>
2025-07-01 05:47:46.931 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:47:46.938 alo = 254, ahi = 1101
2025-07-01 05:47:46.945 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:47:46.951 blo = 254, bhi = 1101
2025-07-01 05:47:46.964
2025-07-01 05:47:46.972 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:47:46.979 r"""
2025-07-01 05:47:46.987 When replacing one block of lines with another, search the blocks
2025-07-01 05:47:46.999 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:47:47.007 synch point, and intraline difference marking is done on the
2025-07-01 05:47:47.021 similar pair. Lots of work, but often worth it.
2025-07-01 05:47:47.034
2025-07-01 05:47:47.044 Example:
2025-07-01 05:47:47.053
2025-07-01 05:47:47.061 >>> d = Differ()
2025-07-01 05:47:47.067 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:47:47.072 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:47:47.077 >>> print(''.join(results), end="")
2025-07-01 05:47:47.085 - abcDefghiJkl
2025-07-01 05:47:47.110 + abcdefGhijkl
2025-07-01 05:47:47.132 """
2025-07-01 05:47:47.140
2025-07-01 05:47:47.147 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:47:47.153 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:47:47.158 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:47:47.162 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:47:47.168 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:47:47.173
2025-07-01 05:47:47.179 # search for the pair that matches best without being identical
2025-07-01 05:47:47.185 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:47:47.197 # on junk -- unless we have to)
2025-07-01 05:47:47.206 for j in range(blo, bhi):
2025-07-01 05:47:47.213 bj = b[j]
2025-07-01 05:47:47.221 cruncher.set_seq2(bj)
2025-07-01 05:47:47.232 for i in range(alo, ahi):
2025-07-01 05:47:47.239 ai = a[i]
2025-07-01 05:47:47.247 if ai == bj:
2025-07-01 05:47:47.259 if eqi is None:
2025-07-01 05:47:47.268 eqi, eqj = i, j
2025-07-01 05:47:47.274 continue
2025-07-01 05:47:47.280 cruncher.set_seq1(ai)
2025-07-01 05:47:47.287 # computing similarity is expensive, so use the quick
2025-07-01 05:47:47.293 # upper bounds first -- have seen this speed up messy
2025-07-01 05:47:47.299 # compares by a factor of 3.
2025-07-01 05:47:47.305 # note that ratio() is only expensive to compute the first
2025-07-01 05:47:47.311 # time it's called on a sequence pair; the expensive part
2025-07-01 05:47:47.318 # of the computation is cached by cruncher
2025-07-01 05:47:47.326 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:47:47.339 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:47:47.347 cruncher.ratio() > best_ratio:
2025-07-01 05:47:47.354 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:47:47.362 if best_ratio < cutoff:
2025-07-01 05:47:47.369 # no non-identical "pretty close" pair
2025-07-01 05:47:47.375 if eqi is None:
2025-07-01 05:47:47.383 # no identical pair either -- treat it as a straight replace
2025-07-01 05:47:47.390 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:47:47.397 return
2025-07-01 05:47:47.404 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:47:47.410 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:47:47.417 else:
2025-07-01 05:47:47.423 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:47:47.431 eqi = None
2025-07-01 05:47:47.437
2025-07-01 05:47:47.444 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:47:47.450 # identical
2025-07-01 05:47:47.457
2025-07-01 05:47:47.464 # pump out diffs from before the synch point
2025-07-01 05:47:47.470 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:47:47.476
2025-07-01 05:47:47.482 # do intraline marking on the synch pair
2025-07-01 05:47:47.488 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:47:47.493 if eqi is None:
2025-07-01 05:47:47.500 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:47:47.506 atags = btags = ""
2025-07-01 05:47:47.512 cruncher.set_seqs(aelt, belt)
2025-07-01 05:47:47.519 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:47:47.524 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:47:47.531 if tag == 'replace':
2025-07-01 05:47:47.538 atags += '^' * la
2025-07-01 05:47:47.549 btags += '^' * lb
2025-07-01 05:47:47.555 elif tag == 'delete':
2025-07-01 05:47:47.563 atags += '-' * la
2025-07-01 05:47:47.571 elif tag == 'insert':
2025-07-01 05:47:47.580 btags += '+' * lb
2025-07-01 05:47:47.589 elif tag == 'equal':
2025-07-01 05:47:47.596 atags += ' ' * la
2025-07-01 05:47:47.603 btags += ' ' * lb
2025-07-01 05:47:47.610 else:
2025-07-01 05:47:47.622 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:47:47.631 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:47:47.639 else:
2025-07-01 05:47:47.645 # the synch pair is identical
2025-07-01 05:47:47.655 yield ' ' + aelt
2025-07-01 05:47:47.667
2025-07-01 05:47:47.677 # pump out diffs from after the synch point
2025-07-01 05:47:47.691 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:47:47.703
2025-07-01 05:47:47.713 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:47:47.726 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:47:47.737
2025-07-01 05:47:47.746 self = <difflib.Differ object at [hex]>
2025-07-01 05:47:47.756 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:47:47.767 alo = 255, ahi = 1101
2025-07-01 05:47:47.776 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:47:47.783 blo = 255, bhi = 1101
2025-07-01 05:47:47.788
2025-07-01 05:47:47.794 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:47:47.800 g = []
2025-07-01 05:47:47.806 if alo < ahi:
2025-07-01 05:47:47.818 if blo < bhi:
2025-07-01 05:47:47.826 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:47:47.832 else:
2025-07-01 05:47:47.838 g = self._dump('-', a, alo, ahi)
2025-07-01 05:47:47.848 elif blo < bhi:
2025-07-01 05:47:47.856 g = self._dump('+', b, blo, bhi)
2025-07-01 05:47:47.863
2025-07-01 05:47:47.870 > yield from g
2025-07-01 05:47:47.879
2025-07-01 05:47:47.887 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:47:47.899 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:47:47.909
2025-07-01 05:47:47.917 self = <difflib.Differ object at [hex]>
2025-07-01 05:47:47.923 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:47:47.928 alo = 255, ahi = 1101
2025-07-01 05:47:47.935 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:47:47.941 blo = 255, bhi = 1101
2025-07-01 05:47:47.946
2025-07-01 05:47:47.955 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:47:47.965 r"""
2025-07-01 05:47:47.974 When replacing one block of lines with another, search the blocks
2025-07-01 05:47:47.982 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:47:47.990 synch point, and intraline difference marking is done on the
2025-07-01 05:47:48.000 similar pair. Lots of work, but often worth it.
2025-07-01 05:47:48.011
2025-07-01 05:47:48.022 Example:
2025-07-01 05:47:48.032
2025-07-01 05:47:48.040 >>> d = Differ()
2025-07-01 05:47:48.046 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:47:48.053 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:47:48.059 >>> print(''.join(results), end="")
2025-07-01 05:47:48.064 - abcDefghiJkl
2025-07-01 05:47:48.075 + abcdefGhijkl
2025-07-01 05:47:48.087 """
2025-07-01 05:47:48.097
2025-07-01 05:47:48.106 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:47:48.115 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:47:48.123 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:47:48.130 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:47:48.136 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:47:48.142
2025-07-01 05:47:48.152 # search for the pair that matches best without being identical
2025-07-01 05:47:48.161 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:47:48.169 # on junk -- unless we have to)
2025-07-01 05:47:48.175 for j in range(blo, bhi):
2025-07-01 05:47:48.182 bj = b[j]
2025-07-01 05:47:48.188 cruncher.set_seq2(bj)
2025-07-01 05:47:48.195 for i in range(alo, ahi):
2025-07-01 05:47:48.205 ai = a[i]
2025-07-01 05:47:48.213 if ai == bj:
2025-07-01 05:47:48.219 if eqi is None:
2025-07-01 05:47:48.226 eqi, eqj = i, j
2025-07-01 05:47:48.231 continue
2025-07-01 05:47:48.239 cruncher.set_seq1(ai)
2025-07-01 05:47:48.249 # computing similarity is expensive, so use the quick
2025-07-01 05:47:48.258 # upper bounds first -- have seen this speed up messy
2025-07-01 05:47:48.265 # compares by a factor of 3.
2025-07-01 05:47:48.271 # note that ratio() is only expensive to compute the first
2025-07-01 05:47:48.277 # time it's called on a sequence pair; the expensive part
2025-07-01 05:47:48.283 # of the computation is cached by cruncher
2025-07-01 05:47:48.289 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:47:48.295 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:47:48.302 cruncher.ratio() > best_ratio:
2025-07-01 05:47:48.314 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:47:48.325 if best_ratio < cutoff:
2025-07-01 05:47:48.333 # no non-identical "pretty close" pair
2025-07-01 05:47:48.344 if eqi is None:
2025-07-01 05:47:48.353 # no identical pair either -- treat it as a straight replace
2025-07-01 05:47:48.367 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:47:48.378 return
2025-07-01 05:47:48.388 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:47:48.401 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:47:48.412 else:
2025-07-01 05:47:48.423 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:47:48.436 eqi = None
2025-07-01 05:47:48.444
2025-07-01 05:47:48.457 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:47:48.468 # identical
2025-07-01 05:47:48.477
2025-07-01 05:47:48.484 # pump out diffs from before the synch point
2025-07-01 05:47:48.491 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:47:48.497
2025-07-01 05:47:48.502 # do intraline marking on the synch pair
2025-07-01 05:47:48.513 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:47:48.523 if eqi is None:
2025-07-01 05:47:48.531 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:47:48.539 atags = btags = ""
2025-07-01 05:47:48.547 cruncher.set_seqs(aelt, belt)
2025-07-01 05:47:48.559 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:47:48.569 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:47:48.581 if tag == 'replace':
2025-07-01 05:47:48.591 atags += '^' * la
2025-07-01 05:47:48.599 btags += '^' * lb
2025-07-01 05:47:48.611 elif tag == 'delete':
2025-07-01 05:47:48.623 atags += '-' * la
2025-07-01 05:47:48.633 elif tag == 'insert':
2025-07-01 05:47:48.641 btags += '+' * lb
2025-07-01 05:47:48.648 elif tag == 'equal':
2025-07-01 05:47:48.655 atags += ' ' * la
2025-07-01 05:47:48.663 btags += ' ' * lb
2025-07-01 05:47:48.673 else:
2025-07-01 05:47:48.681 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:47:48.693 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:47:48.704 else:
2025-07-01 05:47:48.716 # the synch pair is identical
2025-07-01 05:47:48.724 yield ' ' + aelt
2025-07-01 05:47:48.731
2025-07-01 05:47:48.739 # pump out diffs from after the synch point
2025-07-01 05:47:48.752 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:47:48.765
2025-07-01 05:47:48.774 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:47:48.781 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:47:48.787
2025-07-01 05:47:48.794 self = <difflib.Differ object at [hex]>
2025-07-01 05:47:48.799 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:47:48.804 alo = 256, ahi = 1101
2025-07-01 05:47:48.809 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:47:48.814 blo = 256, bhi = 1101
2025-07-01 05:47:48.819
2025-07-01 05:47:48.824 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:47:48.829 g = []
2025-07-01 05:47:48.835 if alo < ahi:
2025-07-01 05:47:48.840 if blo < bhi:
2025-07-01 05:47:48.846 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:47:48.851 else:
2025-07-01 05:47:48.859 g = self._dump('-', a, alo, ahi)
2025-07-01 05:47:48.869 elif blo < bhi:
2025-07-01 05:47:48.877 g = self._dump('+', b, blo, bhi)
2025-07-01 05:47:48.884
2025-07-01 05:47:48.895 > yield from g
2025-07-01 05:47:48.906
2025-07-01 05:47:48.916 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:47:48.925 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:47:48.933
2025-07-01 05:47:48.940 self = <difflib.Differ object at [hex]>
2025-07-01 05:47:48.945 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:47:48.951 alo = 256, ahi = 1101
2025-07-01 05:47:48.958 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:47:48.963 blo = 256, bhi = 1101
2025-07-01 05:47:48.968
2025-07-01 05:47:48.973 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:47:48.977 r"""
2025-07-01 05:47:48.982 When replacing one block of lines with another, search the blocks
2025-07-01 05:47:48.989 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:47:48.995 synch point, and intraline difference marking is done on the
2025-07-01 05:47:49.001 similar pair. Lots of work, but often worth it.
2025-07-01 05:47:49.007
2025-07-01 05:47:49.012 Example:
2025-07-01 05:47:49.018
2025-07-01 05:47:49.028 >>> d = Differ()
2025-07-01 05:47:49.037 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:47:49.044 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:47:49.051 >>> print(''.join(results), end="")
2025-07-01 05:47:49.057 - abcDefghiJkl
2025-07-01 05:47:49.072 + abcdefGhijkl
2025-07-01 05:47:49.094 """
2025-07-01 05:47:49.104
2025-07-01 05:47:49.115 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:47:49.124 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:47:49.131 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:47:49.138 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:47:49.146 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:47:49.151
2025-07-01 05:47:49.157 # search for the pair that matches best without being identical
2025-07-01 05:47:49.163 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:47:49.169 # on junk -- unless we have to)
2025-07-01 05:47:49.175 for j in range(blo, bhi):
2025-07-01 05:47:49.186 bj = b[j]
2025-07-01 05:47:49.199 cruncher.set_seq2(bj)
2025-07-01 05:47:49.210 for i in range(alo, ahi):
2025-07-01 05:47:49.219 ai = a[i]
2025-07-01 05:47:49.228 if ai == bj:
2025-07-01 05:47:49.235 if eqi is None:
2025-07-01 05:47:49.241 eqi, eqj = i, j
2025-07-01 05:47:49.247 continue
2025-07-01 05:47:49.260 cruncher.set_seq1(ai)
2025-07-01 05:47:49.269 # computing similarity is expensive, so use the quick
2025-07-01 05:47:49.282 # upper bounds first -- have seen this speed up messy
2025-07-01 05:47:49.292 # compares by a factor of 3.
2025-07-01 05:47:49.300 # note that ratio() is only expensive to compute the first
2025-07-01 05:47:49.313 # time it's called on a sequence pair; the expensive part
2025-07-01 05:47:49.323 # of the computation is cached by cruncher
2025-07-01 05:47:49.332 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:47:49.340 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:47:49.349 cruncher.ratio() > best_ratio:
2025-07-01 05:47:49.356 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:47:49.362 if best_ratio < cutoff:
2025-07-01 05:47:49.368 # no non-identical "pretty close" pair
2025-07-01 05:47:49.375 if eqi is None:
2025-07-01 05:47:49.386 # no identical pair either -- treat it as a straight replace
2025-07-01 05:47:49.393 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:47:49.402 return
2025-07-01 05:47:49.414 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:47:49.424 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:47:49.435 else:
2025-07-01 05:47:49.443 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:47:49.452 eqi = None
2025-07-01 05:47:49.458
2025-07-01 05:47:49.467 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:47:49.474 # identical
2025-07-01 05:47:49.484
2025-07-01 05:47:49.493 # pump out diffs from before the synch point
2025-07-01 05:47:49.505 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:47:49.515
2025-07-01 05:47:49.527 # do intraline marking on the synch pair
2025-07-01 05:47:49.538 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:47:49.547 if eqi is None:
2025-07-01 05:47:49.555 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:47:49.562 atags = btags = ""
2025-07-01 05:47:49.569 cruncher.set_seqs(aelt, belt)
2025-07-01 05:47:49.575 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:47:49.580 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:47:49.586 if tag == 'replace':
2025-07-01 05:47:49.597 atags += '^' * la
2025-07-01 05:47:49.607 btags += '^' * lb
2025-07-01 05:47:49.619 elif tag == 'delete':
2025-07-01 05:47:49.628 atags += '-' * la
2025-07-01 05:47:49.636 elif tag == 'insert':
2025-07-01 05:47:49.642 btags += '+' * lb
2025-07-01 05:47:49.649 elif tag == 'equal':
2025-07-01 05:47:49.654 atags += ' ' * la
2025-07-01 05:47:49.660 btags += ' ' * lb
2025-07-01 05:47:49.667 else:
2025-07-01 05:47:49.677 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:47:49.686 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:47:49.695 else:
2025-07-01 05:47:49.706 # the synch pair is identical
2025-07-01 05:47:49.718 yield ' ' + aelt
2025-07-01 05:47:49.728
2025-07-01 05:47:49.737 # pump out diffs from after the synch point
2025-07-01 05:47:49.744 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:47:49.750
2025-07-01 05:47:49.762 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:47:49.770 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:47:49.776
2025-07-01 05:47:49.787 self = <difflib.Differ object at [hex]>
2025-07-01 05:47:49.795 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:47:49.802 alo = 257, ahi = 1101
2025-07-01 05:47:49.810 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:47:49.821 blo = 257, bhi = 1101
2025-07-01 05:47:49.828
2025-07-01 05:47:49.835 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:47:49.840 g = []
2025-07-01 05:47:49.847 if alo < ahi:
2025-07-01 05:47:49.856 if blo < bhi:
2025-07-01 05:47:49.865 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:47:49.873 else:
2025-07-01 05:47:49.883 g = self._dump('-', a, alo, ahi)
2025-07-01 05:47:49.893 elif blo < bhi:
2025-07-01 05:47:49.902 g = self._dump('+', b, blo, bhi)
2025-07-01 05:47:49.909
2025-07-01 05:47:49.920 > yield from g
2025-07-01 05:47:49.929
2025-07-01 05:47:49.936 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:47:49.944 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:47:49.950
2025-07-01 05:47:49.959 self = <difflib.Differ object at [hex]>
2025-07-01 05:47:49.967 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:47:49.972 alo = 257, ahi = 1101
2025-07-01 05:47:49.980 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:47:49.987 blo = 257, bhi = 1101
2025-07-01 05:47:49.994
2025-07-01 05:47:49.999 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:47:50.006 r"""
2025-07-01 05:47:50.015 When replacing one block of lines with another, search the blocks
2025-07-01 05:47:50.022 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:47:50.031 synch point, and intraline difference marking is done on the
2025-07-01 05:47:50.043 similar pair. Lots of work, but often worth it.
2025-07-01 05:47:50.054
2025-07-01 05:47:50.063 Example:
2025-07-01 05:47:50.070
2025-07-01 05:47:50.078 >>> d = Differ()
2025-07-01 05:47:50.085 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:47:50.095 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:47:50.106 >>> print(''.join(results), end="")
2025-07-01 05:47:50.115 - abcDefghiJkl
2025-07-01 05:47:50.132 + abcdefGhijkl
2025-07-01 05:47:50.146 """
2025-07-01 05:47:50.152
2025-07-01 05:47:50.158 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:47:50.165 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:47:50.174 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:47:50.186 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:47:50.197 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:47:50.208
2025-07-01 05:47:50.217 # search for the pair that matches best without being identical
2025-07-01 05:47:50.224 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:47:50.231 # on junk -- unless we have to)
2025-07-01 05:47:50.237 for j in range(blo, bhi):
2025-07-01 05:47:50.243 bj = b[j]
2025-07-01 05:47:50.251 cruncher.set_seq2(bj)
2025-07-01 05:47:50.257 for i in range(alo, ahi):
2025-07-01 05:47:50.264 ai = a[i]
2025-07-01 05:47:50.271 if ai == bj:
2025-07-01 05:47:50.278 if eqi is None:
2025-07-01 05:47:50.287 eqi, eqj = i, j
2025-07-01 05:47:50.296 continue
2025-07-01 05:47:50.302 cruncher.set_seq1(ai)
2025-07-01 05:47:50.308 # computing similarity is expensive, so use the quick
2025-07-01 05:47:50.315 # upper bounds first -- have seen this speed up messy
2025-07-01 05:47:50.327 # compares by a factor of 3.
2025-07-01 05:47:50.335 # note that ratio() is only expensive to compute the first
2025-07-01 05:47:50.344 # time it's called on a sequence pair; the expensive part
2025-07-01 05:47:50.351 # of the computation is cached by cruncher
2025-07-01 05:47:50.358 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:47:50.364 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:47:50.370 cruncher.ratio() > best_ratio:
2025-07-01 05:47:50.383 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:47:50.392 if best_ratio < cutoff:
2025-07-01 05:47:50.401 # no non-identical "pretty close" pair
2025-07-01 05:47:50.409 if eqi is None:
2025-07-01 05:47:50.416 # no identical pair either -- treat it as a straight replace
2025-07-01 05:47:50.422 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:47:50.428 return
2025-07-01 05:47:50.434 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:47:50.444 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:47:50.456 else:
2025-07-01 05:47:50.468 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:47:50.477 eqi = None
2025-07-01 05:47:50.485
2025-07-01 05:47:50.492 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:47:50.498 # identical
2025-07-01 05:47:50.504
2025-07-01 05:47:50.509 # pump out diffs from before the synch point
2025-07-01 05:47:50.515 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:47:50.520
2025-07-01 05:47:50.526 # do intraline marking on the synch pair
2025-07-01 05:47:50.536 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:47:50.546 if eqi is None:
2025-07-01 05:47:50.555 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:47:50.563 atags = btags = ""
2025-07-01 05:47:50.571 cruncher.set_seqs(aelt, belt)
2025-07-01 05:47:50.582 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:47:50.592 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:47:50.601 if tag == 'replace':
2025-07-01 05:47:50.613 atags += '^' * la
2025-07-01 05:47:50.620 btags += '^' * lb
2025-07-01 05:47:50.626 elif tag == 'delete':
2025-07-01 05:47:50.631 atags += '-' * la
2025-07-01 05:47:50.639 elif tag == 'insert':
2025-07-01 05:47:50.650 btags += '+' * lb
2025-07-01 05:47:50.659 elif tag == 'equal':
2025-07-01 05:47:50.669 atags += ' ' * la
2025-07-01 05:47:50.679 btags += ' ' * lb
2025-07-01 05:47:50.687 else:
2025-07-01 05:47:50.694 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:47:50.703 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:47:50.714 else:
2025-07-01 05:47:50.722 # the synch pair is identical
2025-07-01 05:47:50.730 yield ' ' + aelt
2025-07-01 05:47:50.739
2025-07-01 05:47:50.746 # pump out diffs from after the synch point
2025-07-01 05:47:50.753 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:47:50.759
2025-07-01 05:47:50.764 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:47:50.770 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:47:50.777
2025-07-01 05:47:50.784 self = <difflib.Differ object at [hex]>
2025-07-01 05:47:50.797 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:47:50.809 alo = 258, ahi = 1101
2025-07-01 05:47:50.819 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:47:50.827 blo = 258, bhi = 1101
2025-07-01 05:47:50.835
2025-07-01 05:47:50.846 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:47:50.857 g = []
2025-07-01 05:47:50.869 if alo < ahi:
2025-07-01 05:47:50.880 if blo < bhi:
2025-07-01 05:47:50.889 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:47:50.896 else:
2025-07-01 05:47:50.903 g = self._dump('-', a, alo, ahi)
2025-07-01 05:47:50.909 elif blo < bhi:
2025-07-01 05:47:50.917 g = self._dump('+', b, blo, bhi)
2025-07-01 05:47:50.922
2025-07-01 05:47:50.928 > yield from g
2025-07-01 05:47:50.934
2025-07-01 05:47:50.940 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:47:50.946 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:47:50.951
2025-07-01 05:47:50.960 self = <difflib.Differ object at [hex]>
2025-07-01 05:47:50.973 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:47:50.983 alo = 258, ahi = 1101
2025-07-01 05:47:50.998 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:47:51.007 blo = 258, bhi = 1101
2025-07-01 05:47:51.014
2025-07-01 05:47:51.025 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:47:51.033 r"""
2025-07-01 05:47:51.041 When replacing one block of lines with another, search the blocks
2025-07-01 05:47:51.048 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:47:51.056 synch point, and intraline difference marking is done on the
2025-07-01 05:47:51.064 similar pair. Lots of work, but often worth it.
2025-07-01 05:47:51.070
2025-07-01 05:47:51.079 Example:
2025-07-01 05:47:51.089
2025-07-01 05:47:51.097 >>> d = Differ()
2025-07-01 05:47:51.103 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:47:51.110 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:47:51.116 >>> print(''.join(results), end="")
2025-07-01 05:47:51.121 - abcDefghiJkl
2025-07-01 05:47:51.130 + abcdefGhijkl
2025-07-01 05:47:51.141 """
2025-07-01 05:47:51.147
2025-07-01 05:47:51.153 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:47:51.163 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:47:51.171 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:47:51.181 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:47:51.189 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:47:51.196
2025-07-01 05:47:51.202 # search for the pair that matches best without being identical
2025-07-01 05:47:51.207 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:47:51.212 # on junk -- unless we have to)
2025-07-01 05:47:51.217 for j in range(blo, bhi):
2025-07-01 05:47:51.223 bj = b[j]
2025-07-01 05:47:51.228 cruncher.set_seq2(bj)
2025-07-01 05:47:51.233 for i in range(alo, ahi):
2025-07-01 05:47:51.238 ai = a[i]
2025-07-01 05:47:51.243 if ai == bj:
2025-07-01 05:47:51.248 if eqi is None:
2025-07-01 05:47:51.254 eqi, eqj = i, j
2025-07-01 05:47:51.263 continue
2025-07-01 05:47:51.273 cruncher.set_seq1(ai)
2025-07-01 05:47:51.285 # computing similarity is expensive, so use the quick
2025-07-01 05:47:51.298 # upper bounds first -- have seen this speed up messy
2025-07-01 05:47:51.308 # compares by a factor of 3.
2025-07-01 05:47:51.316 # note that ratio() is only expensive to compute the first
2025-07-01 05:47:51.323 # time it's called on a sequence pair; the expensive part
2025-07-01 05:47:51.329 # of the computation is cached by cruncher
2025-07-01 05:47:51.335 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:47:51.341 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:47:51.347 cruncher.ratio() > best_ratio:
2025-07-01 05:47:51.355 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:47:51.366 if best_ratio < cutoff:
2025-07-01 05:47:51.379 # no non-identical "pretty close" pair
2025-07-01 05:47:51.389 if eqi is None:
2025-07-01 05:47:51.397 # no identical pair either -- treat it as a straight replace
2025-07-01 05:47:51.403 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:47:51.409 return
2025-07-01 05:47:51.415 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:47:51.421 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:47:51.426 else:
2025-07-01 05:47:51.432 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:47:51.438 eqi = None
2025-07-01 05:47:51.445
2025-07-01 05:47:51.453 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:47:51.459 # identical
2025-07-01 05:47:51.464
2025-07-01 05:47:51.469 # pump out diffs from before the synch point
2025-07-01 05:47:51.474 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:47:51.479
2025-07-01 05:47:51.484 # do intraline marking on the synch pair
2025-07-01 05:47:51.490 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:47:51.495 if eqi is None:
2025-07-01 05:47:51.503 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:47:51.514 atags = btags = ""
2025-07-01 05:47:51.520 cruncher.set_seqs(aelt, belt)
2025-07-01 05:47:51.527 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:47:51.532 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:47:51.539 if tag == 'replace':
2025-07-01 05:47:51.546 atags += '^' * la
2025-07-01 05:47:51.554 btags += '^' * lb
2025-07-01 05:47:51.560 elif tag == 'delete':
2025-07-01 05:47:51.566 atags += '-' * la
2025-07-01 05:47:51.572 elif tag == 'insert':
2025-07-01 05:47:51.583 btags += '+' * lb
2025-07-01 05:47:51.593 elif tag == 'equal':
2025-07-01 05:47:51.600 atags += ' ' * la
2025-07-01 05:47:51.608 btags += ' ' * lb
2025-07-01 05:47:51.615 else:
2025-07-01 05:47:51.621 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:47:51.627 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:47:51.632 else:
2025-07-01 05:47:51.638 # the synch pair is identical
2025-07-01 05:47:51.644 yield ' ' + aelt
2025-07-01 05:47:51.649
2025-07-01 05:47:51.655 # pump out diffs from after the synch point
2025-07-01 05:47:51.660 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:47:51.665
2025-07-01 05:47:51.670 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:47:51.677 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:47:51.684
2025-07-01 05:47:51.691 self = <difflib.Differ object at [hex]>
2025-07-01 05:47:51.698 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:47:51.710 alo = 259, ahi = 1101
2025-07-01 05:47:51.720 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:47:51.727 blo = 259, bhi = 1101
2025-07-01 05:47:51.733
2025-07-01 05:47:51.738 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:47:51.745 g = []
2025-07-01 05:47:51.752 if alo < ahi:
2025-07-01 05:47:51.758 if blo < bhi:
2025-07-01 05:47:51.765 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:47:51.772 else:
2025-07-01 05:47:51.779 g = self._dump('-', a, alo, ahi)
2025-07-01 05:47:51.787 elif blo < bhi:
2025-07-01 05:47:51.794 g = self._dump('+', b, blo, bhi)
2025-07-01 05:47:51.805
2025-07-01 05:47:51.815 > yield from g
2025-07-01 05:47:51.822
2025-07-01 05:47:51.828 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:47:51.834 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:47:51.841
2025-07-01 05:47:51.847 self = <difflib.Differ object at [hex]>
2025-07-01 05:47:51.856 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:47:51.863 alo = 259, ahi = 1101
2025-07-01 05:47:51.871 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:47:51.878 blo = 259, bhi = 1101
2025-07-01 05:47:51.887
2025-07-01 05:47:51.897 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:47:51.905 r"""
2025-07-01 05:47:51.911 When replacing one block of lines with another, search the blocks
2025-07-01 05:47:51.917 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:47:51.921 synch point, and intraline difference marking is done on the
2025-07-01 05:47:51.926 similar pair. Lots of work, but often worth it.
2025-07-01 05:47:51.931
2025-07-01 05:47:51.935 Example:
2025-07-01 05:47:51.940
2025-07-01 05:47:51.946 >>> d = Differ()
2025-07-01 05:47:51.952 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:47:51.958 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:47:51.965 >>> print(''.join(results), end="")
2025-07-01 05:47:51.971 - abcDefghiJkl
2025-07-01 05:47:51.990 + abcdefGhijkl
2025-07-01 05:47:52.005 """
2025-07-01 05:47:52.011
2025-07-01 05:47:52.018 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:47:52.030 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:47:52.037 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:47:52.044 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:47:52.049 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:47:52.055
2025-07-01 05:47:52.060 # search for the pair that matches best without being identical
2025-07-01 05:47:52.065 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:47:52.070 # on junk -- unless we have to)
2025-07-01 05:47:52.075 for j in range(blo, bhi):
2025-07-01 05:47:52.080 bj = b[j]
2025-07-01 05:47:52.085 cruncher.set_seq2(bj)
2025-07-01 05:47:52.091 for i in range(alo, ahi):
2025-07-01 05:47:52.097 ai = a[i]
2025-07-01 05:47:52.102 if ai == bj:
2025-07-01 05:47:52.109 if eqi is None:
2025-07-01 05:47:52.115 eqi, eqj = i, j
2025-07-01 05:47:52.121 continue
2025-07-01 05:47:52.126 cruncher.set_seq1(ai)
2025-07-01 05:47:52.133 # computing similarity is expensive, so use the quick
2025-07-01 05:47:52.138 # upper bounds first -- have seen this speed up messy
2025-07-01 05:47:52.144 # compares by a factor of 3.
2025-07-01 05:47:52.150 # note that ratio() is only expensive to compute the first
2025-07-01 05:47:52.160 # time it's called on a sequence pair; the expensive part
2025-07-01 05:47:52.170 # of the computation is cached by cruncher
2025-07-01 05:47:52.178 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:47:52.184 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:47:52.191 cruncher.ratio() > best_ratio:
2025-07-01 05:47:52.200 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:47:52.207 if best_ratio < cutoff:
2025-07-01 05:47:52.215 # no non-identical "pretty close" pair
2025-07-01 05:47:52.221 if eqi is None:
2025-07-01 05:47:52.234 # no identical pair either -- treat it as a straight replace
2025-07-01 05:47:52.246 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:47:52.253 return
2025-07-01 05:47:52.261 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:47:52.267 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:47:52.273 else:
2025-07-01 05:47:52.283 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:47:52.292 eqi = None
2025-07-01 05:47:52.299
2025-07-01 05:47:52.307 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:47:52.318 # identical
2025-07-01 05:47:52.330
2025-07-01 05:47:52.336 # pump out diffs from before the synch point
2025-07-01 05:47:52.344 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:47:52.350
2025-07-01 05:47:52.363 # do intraline marking on the synch pair
2025-07-01 05:47:52.374 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:47:52.383 if eqi is None:
2025-07-01 05:47:52.390 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:47:52.395 atags = btags = ""
2025-07-01 05:47:52.400 cruncher.set_seqs(aelt, belt)
2025-07-01 05:47:52.406 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:47:52.413 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:47:52.421 if tag == 'replace':
2025-07-01 05:47:52.427 atags += '^' * la
2025-07-01 05:47:52.435 btags += '^' * lb
2025-07-01 05:47:52.443 elif tag == 'delete':
2025-07-01 05:47:52.449 atags += '-' * la
2025-07-01 05:47:52.455 elif tag == 'insert':
2025-07-01 05:47:52.461 btags += '+' * lb
2025-07-01 05:47:52.466 elif tag == 'equal':
2025-07-01 05:47:52.475 atags += ' ' * la
2025-07-01 05:47:52.485 btags += ' ' * lb
2025-07-01 05:47:52.493 else:
2025-07-01 05:47:52.501 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:47:52.507 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:47:52.513 else:
2025-07-01 05:47:52.519 # the synch pair is identical
2025-07-01 05:47:52.526 yield ' ' + aelt
2025-07-01 05:47:52.536
2025-07-01 05:47:52.544 # pump out diffs from after the synch point
2025-07-01 05:47:52.552 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:47:52.558
2025-07-01 05:47:52.565 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:47:52.575 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:47:52.584
2025-07-01 05:47:52.592 self = <difflib.Differ object at [hex]>
2025-07-01 05:47:52.600 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:47:52.606 alo = 260, ahi = 1101
2025-07-01 05:47:52.613 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:47:52.625 blo = 260, bhi = 1101
2025-07-01 05:47:52.634
2025-07-01 05:47:52.641 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:47:52.647 g = []
2025-07-01 05:47:52.659 if alo < ahi:
2025-07-01 05:47:52.668 if blo < bhi:
2025-07-01 05:47:52.675 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:47:52.683 else:
2025-07-01 05:47:52.694 g = self._dump('-', a, alo, ahi)
2025-07-01 05:47:52.703 elif blo < bhi:
2025-07-01 05:47:52.713 g = self._dump('+', b, blo, bhi)
2025-07-01 05:47:52.725
2025-07-01 05:47:52.734 > yield from g
2025-07-01 05:47:52.742
2025-07-01 05:47:52.750 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:47:52.761 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:47:52.771
2025-07-01 05:47:52.779 self = <difflib.Differ object at [hex]>
2025-07-01 05:47:52.787 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:47:52.793 alo = 260, ahi = 1101
2025-07-01 05:47:52.805 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:47:52.816 blo = 260, bhi = 1101
2025-07-01 05:47:52.828
2025-07-01 05:47:52.837 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:47:52.848 r"""
2025-07-01 05:47:52.859 When replacing one block of lines with another, search the blocks
2025-07-01 05:47:52.873 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:47:52.886 synch point, and intraline difference marking is done on the
2025-07-01 05:47:52.897 similar pair. Lots of work, but often worth it.
2025-07-01 05:47:52.905
2025-07-01 05:47:52.913 Example:
2025-07-01 05:47:52.921
2025-07-01 05:47:52.928 >>> d = Differ()
2025-07-01 05:47:52.935 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:47:52.943 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:47:52.954 >>> print(''.join(results), end="")
2025-07-01 05:47:52.963 - abcDefghiJkl
2025-07-01 05:47:52.979 + abcdefGhijkl
2025-07-01 05:47:52.990 """
2025-07-01 05:47:53.000
2025-07-01 05:47:53.011 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:47:53.020 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:47:53.027 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:47:53.034 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:47:53.044 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:47:53.053
2025-07-01 05:47:53.065 # search for the pair that matches best without being identical
2025-07-01 05:47:53.074 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:47:53.082 # on junk -- unless we have to)
2025-07-01 05:47:53.088 for j in range(blo, bhi):
2025-07-01 05:47:53.095 bj = b[j]
2025-07-01 05:47:53.103 cruncher.set_seq2(bj)
2025-07-01 05:47:53.113 for i in range(alo, ahi):
2025-07-01 05:47:53.121 ai = a[i]
2025-07-01 05:47:53.126 if ai == bj:
2025-07-01 05:47:53.132 if eqi is None:
2025-07-01 05:47:53.139 eqi, eqj = i, j
2025-07-01 05:47:53.151 continue
2025-07-01 05:47:53.159 cruncher.set_seq1(ai)
2025-07-01 05:47:53.166 # computing similarity is expensive, so use the quick
2025-07-01 05:47:53.173 # upper bounds first -- have seen this speed up messy
2025-07-01 05:47:53.180 # compares by a factor of 3.
2025-07-01 05:47:53.187 # note that ratio() is only expensive to compute the first
2025-07-01 05:47:53.198 # time it's called on a sequence pair; the expensive part
2025-07-01 05:47:53.210 # of the computation is cached by cruncher
2025-07-01 05:47:53.220 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:47:53.228 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:47:53.235 cruncher.ratio() > best_ratio:
2025-07-01 05:47:53.240 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:47:53.246 if best_ratio < cutoff:
2025-07-01 05:47:53.252 # no non-identical "pretty close" pair
2025-07-01 05:47:53.259 if eqi is None:
2025-07-01 05:47:53.266 # no identical pair either -- treat it as a straight replace
2025-07-01 05:47:53.275 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:47:53.282 return
2025-07-01 05:47:53.289 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:47:53.296 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:47:53.303 else:
2025-07-01 05:47:53.310 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:47:53.321 eqi = None
2025-07-01 05:47:53.329
2025-07-01 05:47:53.336 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:47:53.342 # identical
2025-07-01 05:47:53.347
2025-07-01 05:47:53.353 # pump out diffs from before the synch point
2025-07-01 05:47:53.359 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:47:53.365
2025-07-01 05:47:53.371 # do intraline marking on the synch pair
2025-07-01 05:47:53.378 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:47:53.386 if eqi is None:
2025-07-01 05:47:53.394 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:47:53.401 atags = btags = ""
2025-07-01 05:47:53.413 cruncher.set_seqs(aelt, belt)
2025-07-01 05:47:53.421 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:47:53.428 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:47:53.434 if tag == 'replace':
2025-07-01 05:47:53.440 atags += '^' * la
2025-07-01 05:47:53.446 btags += '^' * lb
2025-07-01 05:47:53.451 elif tag == 'delete':
2025-07-01 05:47:53.456 atags += '-' * la
2025-07-01 05:47:53.461 elif tag == 'insert':
2025-07-01 05:47:53.466 btags += '+' * lb
2025-07-01 05:47:53.472 elif tag == 'equal':
2025-07-01 05:47:53.478 atags += ' ' * la
2025-07-01 05:47:53.483 btags += ' ' * lb
2025-07-01 05:47:53.489 else:
2025-07-01 05:47:53.497 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:47:53.505 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:47:53.511 else:
2025-07-01 05:47:53.517 # the synch pair is identical
2025-07-01 05:47:53.521 yield ' ' + aelt
2025-07-01 05:47:53.526
2025-07-01 05:47:53.531 # pump out diffs from after the synch point
2025-07-01 05:47:53.536 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:47:53.542
2025-07-01 05:47:53.548 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:47:53.554 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:47:53.560
2025-07-01 05:47:53.566 self = <difflib.Differ object at [hex]>
2025-07-01 05:47:53.575 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:47:53.582 alo = 261, ahi = 1101
2025-07-01 05:47:53.590 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:47:53.596 blo = 261, bhi = 1101
2025-07-01 05:47:53.603
2025-07-01 05:47:53.609 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:47:53.616 g = []
2025-07-01 05:47:53.623 if alo < ahi:
2025-07-01 05:47:53.631 if blo < bhi:
2025-07-01 05:47:53.641 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:47:53.654 else:
2025-07-01 05:47:53.660 g = self._dump('-', a, alo, ahi)
2025-07-01 05:47:53.667 elif blo < bhi:
2025-07-01 05:47:53.679 g = self._dump('+', b, blo, bhi)
2025-07-01 05:47:53.690
2025-07-01 05:47:53.699 > yield from g
2025-07-01 05:47:53.707
2025-07-01 05:47:53.714 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:47:53.723 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:47:53.731
2025-07-01 05:47:53.742 self = <difflib.Differ object at [hex]>
2025-07-01 05:47:53.750 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:47:53.756 alo = 261, ahi = 1101
2025-07-01 05:47:53.762 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:47:53.769 blo = 261, bhi = 1101
2025-07-01 05:47:53.777
2025-07-01 05:47:53.785 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:47:53.791 r"""
2025-07-01 05:47:53.797 When replacing one block of lines with another, search the blocks
2025-07-01 05:47:53.803 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:47:53.809 synch point, and intraline difference marking is done on the
2025-07-01 05:47:53.814 similar pair. Lots of work, but often worth it.
2025-07-01 05:47:53.820
2025-07-01 05:47:53.826 Example:
2025-07-01 05:47:53.837
2025-07-01 05:47:53.845 >>> d = Differ()
2025-07-01 05:47:53.852 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:47:53.859 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:47:53.866 >>> print(''.join(results), end="")
2025-07-01 05:47:53.878 - abcDefghiJkl
2025-07-01 05:47:53.899 + abcdefGhijkl
2025-07-01 05:47:53.911 """
2025-07-01 05:47:53.918
2025-07-01 05:47:53.925 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:47:53.933 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:47:53.939 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:47:53.951 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:47:53.960 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:47:53.966
2025-07-01 05:47:53.972 # search for the pair that matches best without being identical
2025-07-01 05:47:53.978 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:47:53.990 # on junk -- unless we have to)
2025-07-01 05:47:53.999 for j in range(blo, bhi):
2025-07-01 05:47:54.011 bj = b[j]
2025-07-01 05:47:54.020 cruncher.set_seq2(bj)
2025-07-01 05:47:54.032 for i in range(alo, ahi):
2025-07-01 05:47:54.044 ai = a[i]
2025-07-01 05:47:54.055 if ai == bj:
2025-07-01 05:47:54.064 if eqi is None:
2025-07-01 05:47:54.075 eqi, eqj = i, j
2025-07-01 05:47:54.085 continue
2025-07-01 05:47:54.100 cruncher.set_seq1(ai)
2025-07-01 05:47:54.109 # computing similarity is expensive, so use the quick
2025-07-01 05:47:54.116 # upper bounds first -- have seen this speed up messy
2025-07-01 05:47:54.123 # compares by a factor of 3.
2025-07-01 05:47:54.131 # note that ratio() is only expensive to compute the first
2025-07-01 05:47:54.142 # time it's called on a sequence pair; the expensive part
2025-07-01 05:47:54.152 # of the computation is cached by cruncher
2025-07-01 05:47:54.161 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:47:54.169 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:47:54.180 cruncher.ratio() > best_ratio:
2025-07-01 05:47:54.189 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:47:54.196 if best_ratio < cutoff:
2025-07-01 05:47:54.203 # no non-identical "pretty close" pair
2025-07-01 05:47:54.208 if eqi is None:
2025-07-01 05:47:54.214 # no identical pair either -- treat it as a straight replace
2025-07-01 05:47:54.220 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:47:54.226 return
2025-07-01 05:47:54.235 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:47:54.243 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:47:54.251 else:
2025-07-01 05:47:54.260 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:47:54.265 eqi = None
2025-07-01 05:47:54.271
2025-07-01 05:47:54.277 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:47:54.283 # identical
2025-07-01 05:47:54.290
2025-07-01 05:47:54.301 # pump out diffs from before the synch point
2025-07-01 05:47:54.310 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:47:54.317
2025-07-01 05:47:54.324 # do intraline marking on the synch pair
2025-07-01 05:47:54.334 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:47:54.340 if eqi is None:
2025-07-01 05:47:54.347 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:47:54.357 atags = btags = ""
2025-07-01 05:47:54.368 cruncher.set_seqs(aelt, belt)
2025-07-01 05:47:54.376 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:47:54.384 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:47:54.392 if tag == 'replace':
2025-07-01 05:47:54.399 atags += '^' * la
2025-07-01 05:47:54.407 btags += '^' * lb
2025-07-01 05:47:54.416 elif tag == 'delete':
2025-07-01 05:47:54.423 atags += '-' * la
2025-07-01 05:47:54.430 elif tag == 'insert':
2025-07-01 05:47:54.437 btags += '+' * lb
2025-07-01 05:47:54.445 elif tag == 'equal':
2025-07-01 05:47:54.452 atags += ' ' * la
2025-07-01 05:47:54.460 btags += ' ' * lb
2025-07-01 05:47:54.466 else:
2025-07-01 05:47:54.473 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:47:54.479 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:47:54.485 else:
2025-07-01 05:47:54.491 # the synch pair is identical
2025-07-01 05:47:54.498 yield ' ' + aelt
2025-07-01 05:47:54.503
2025-07-01 05:47:54.511 # pump out diffs from after the synch point
2025-07-01 05:47:54.523 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:47:54.529
2025-07-01 05:47:54.536 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:47:54.542 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:47:54.548
2025-07-01 05:47:54.555 self = <difflib.Differ object at [hex]>
2025-07-01 05:47:54.564 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:47:54.571 alo = 262, ahi = 1101
2025-07-01 05:47:54.580 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:47:54.587 blo = 262, bhi = 1101
2025-07-01 05:47:54.594
2025-07-01 05:47:54.605 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:47:54.614 g = []
2025-07-01 05:47:54.622 if alo < ahi:
2025-07-01 05:47:54.628 if blo < bhi:
2025-07-01 05:47:54.633 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:47:54.638 else:
2025-07-01 05:47:54.643 g = self._dump('-', a, alo, ahi)
2025-07-01 05:47:54.649 elif blo < bhi:
2025-07-01 05:47:54.655 g = self._dump('+', b, blo, bhi)
2025-07-01 05:47:54.662
2025-07-01 05:47:54.668 > yield from g
2025-07-01 05:47:54.675
2025-07-01 05:47:54.685 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:47:54.694 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:47:54.701
2025-07-01 05:47:54.706 self = <difflib.Differ object at [hex]>
2025-07-01 05:47:54.712 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:47:54.717 alo = 262, ahi = 1101
2025-07-01 05:47:54.722 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:47:54.726 blo = 262, bhi = 1101
2025-07-01 05:47:54.731
2025-07-01 05:47:54.736 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:47:54.740 r"""
2025-07-01 05:47:54.745 When replacing one block of lines with another, search the blocks
2025-07-01 05:47:54.750 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:47:54.755 synch point, and intraline difference marking is done on the
2025-07-01 05:47:54.762 similar pair. Lots of work, but often worth it.
2025-07-01 05:47:54.768
2025-07-01 05:47:54.775 Example:
2025-07-01 05:47:54.786
2025-07-01 05:47:54.794 >>> d = Differ()
2025-07-01 05:47:54.800 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:47:54.807 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:47:54.816 >>> print(''.join(results), end="")
2025-07-01 05:47:54.826 - abcDefghiJkl
2025-07-01 05:47:54.842 + abcdefGhijkl
2025-07-01 05:47:54.853 """
2025-07-01 05:47:54.863
2025-07-01 05:47:54.874 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:47:54.883 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:47:54.893 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:47:54.902 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:47:54.909 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:47:54.916
2025-07-01 05:47:54.922 # search for the pair that matches best without being identical
2025-07-01 05:47:54.929 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:47:54.934 # on junk -- unless we have to)
2025-07-01 05:47:54.940 for j in range(blo, bhi):
2025-07-01 05:47:54.946 bj = b[j]
2025-07-01 05:47:54.951 cruncher.set_seq2(bj)
2025-07-01 05:47:54.956 for i in range(alo, ahi):
2025-07-01 05:47:54.961 ai = a[i]
2025-07-01 05:47:54.966 if ai == bj:
2025-07-01 05:47:54.973 if eqi is None:
2025-07-01 05:47:54.978 eqi, eqj = i, j
2025-07-01 05:47:54.983 continue
2025-07-01 05:47:54.990 cruncher.set_seq1(ai)
2025-07-01 05:47:54.995 # computing similarity is expensive, so use the quick
2025-07-01 05:47:55.002 # upper bounds first -- have seen this speed up messy
2025-07-01 05:47:55.009 # compares by a factor of 3.
2025-07-01 05:47:55.020 # note that ratio() is only expensive to compute the first
2025-07-01 05:47:55.027 # time it's called on a sequence pair; the expensive part
2025-07-01 05:47:55.032 # of the computation is cached by cruncher
2025-07-01 05:47:55.038 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:47:55.043 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:47:55.056 cruncher.ratio() > best_ratio:
2025-07-01 05:47:55.067 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:47:55.075 if best_ratio < cutoff:
2025-07-01 05:47:55.083 # no non-identical "pretty close" pair
2025-07-01 05:47:55.088 if eqi is None:
2025-07-01 05:47:55.093 # no identical pair either -- treat it as a straight replace
2025-07-01 05:47:55.098 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:47:55.103 return
2025-07-01 05:47:55.109 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:47:55.115 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:47:55.123 else:
2025-07-01 05:47:55.135 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:47:55.143 eqi = None
2025-07-01 05:47:55.150
2025-07-01 05:47:55.157 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:47:55.162 # identical
2025-07-01 05:47:55.168
2025-07-01 05:47:55.174 # pump out diffs from before the synch point
2025-07-01 05:47:55.184 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:47:55.196
2025-07-01 05:47:55.206 # do intraline marking on the synch pair
2025-07-01 05:47:55.216 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:47:55.227 if eqi is None:
2025-07-01 05:47:55.236 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:47:55.244 atags = btags = ""
2025-07-01 05:47:55.251 cruncher.set_seqs(aelt, belt)
2025-07-01 05:47:55.259 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:47:55.265 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:47:55.274 if tag == 'replace':
2025-07-01 05:47:55.287 atags += '^' * la
2025-07-01 05:47:55.297 btags += '^' * lb
2025-07-01 05:47:55.305 elif tag == 'delete':
2025-07-01 05:47:55.312 atags += '-' * la
2025-07-01 05:47:55.325 elif tag == 'insert':
2025-07-01 05:47:55.337 btags += '+' * lb
2025-07-01 05:47:55.348 elif tag == 'equal':
2025-07-01 05:47:55.361 atags += ' ' * la
2025-07-01 05:47:55.372 btags += ' ' * lb
2025-07-01 05:47:55.384 else:
2025-07-01 05:47:55.397 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:47:55.408 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:47:55.417 else:
2025-07-01 05:47:55.427 # the synch pair is identical
2025-07-01 05:47:55.437 yield ' ' + aelt
2025-07-01 05:47:55.449
2025-07-01 05:47:55.457 # pump out diffs from after the synch point
2025-07-01 05:47:55.465 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:47:55.472
2025-07-01 05:47:55.479 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:47:55.487 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:47:55.497
2025-07-01 05:47:55.509 self = <difflib.Differ object at [hex]>
2025-07-01 05:47:55.520 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:47:55.528 alo = 263, ahi = 1101
2025-07-01 05:47:55.540 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:47:55.550 blo = 263, bhi = 1101
2025-07-01 05:47:55.560
2025-07-01 05:47:55.571 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:47:55.581 g = []
2025-07-01 05:47:55.591 if alo < ahi:
2025-07-01 05:47:55.598 if blo < bhi:
2025-07-01 05:47:55.604 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:47:55.611 else:
2025-07-01 05:47:55.616 g = self._dump('-', a, alo, ahi)
2025-07-01 05:47:55.629 elif blo < bhi:
2025-07-01 05:47:55.639 g = self._dump('+', b, blo, bhi)
2025-07-01 05:47:55.648
2025-07-01 05:47:55.660 > yield from g
2025-07-01 05:47:55.668
2025-07-01 05:47:55.675 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:47:55.682 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:47:55.692
2025-07-01 05:47:55.702 self = <difflib.Differ object at [hex]>
2025-07-01 05:47:55.711 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:47:55.718 alo = 263, ahi = 1101
2025-07-01 05:47:55.730 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:47:55.739 blo = 263, bhi = 1101
2025-07-01 05:47:55.751
2025-07-01 05:47:55.760 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:47:55.772 r"""
2025-07-01 05:47:55.781 When replacing one block of lines with another, search the blocks
2025-07-01 05:47:55.791 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:47:55.804 synch point, and intraline difference marking is done on the
2025-07-01 05:47:55.814 similar pair. Lots of work, but often worth it.
2025-07-01 05:47:55.823
2025-07-01 05:47:55.834 Example:
2025-07-01 05:47:55.845
2025-07-01 05:47:55.858 >>> d = Differ()
2025-07-01 05:47:55.869 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:47:55.881 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:47:55.893 >>> print(''.join(results), end="")
2025-07-01 05:47:55.902 - abcDefghiJkl
2025-07-01 05:47:55.917 + abcdefGhijkl
2025-07-01 05:47:55.929 """
2025-07-01 05:47:55.934
2025-07-01 05:47:55.938 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:47:55.942 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:47:55.948 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:47:55.954 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:47:55.960 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:47:55.965
2025-07-01 05:47:55.971 # search for the pair that matches best without being identical
2025-07-01 05:47:55.979 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:47:55.990 # on junk -- unless we have to)
2025-07-01 05:47:56.002 for j in range(blo, bhi):
2025-07-01 05:47:56.011 bj = b[j]
2025-07-01 05:47:56.019 cruncher.set_seq2(bj)
2025-07-01 05:47:56.026 for i in range(alo, ahi):
2025-07-01 05:47:56.033 ai = a[i]
2025-07-01 05:47:56.039 if ai == bj:
2025-07-01 05:47:56.044 if eqi is None:
2025-07-01 05:47:56.050 eqi, eqj = i, j
2025-07-01 05:47:56.061 continue
2025-07-01 05:47:56.069 cruncher.set_seq1(ai)
2025-07-01 05:47:56.077 # computing similarity is expensive, so use the quick
2025-07-01 05:47:56.091 # upper bounds first -- have seen this speed up messy
2025-07-01 05:47:56.104 # compares by a factor of 3.
2025-07-01 05:47:56.114 # note that ratio() is only expensive to compute the first
2025-07-01 05:47:56.125 # time it's called on a sequence pair; the expensive part
2025-07-01 05:47:56.134 # of the computation is cached by cruncher
2025-07-01 05:47:56.143 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:47:56.151 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:47:56.158 cruncher.ratio() > best_ratio:
2025-07-01 05:47:56.163 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:47:56.168 if best_ratio < cutoff:
2025-07-01 05:47:56.176 # no non-identical "pretty close" pair
2025-07-01 05:47:56.186 if eqi is None:
2025-07-01 05:47:56.195 # no identical pair either -- treat it as a straight replace
2025-07-01 05:47:56.201 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:47:56.208 return
2025-07-01 05:47:56.213 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:47:56.219 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:47:56.224 else:
2025-07-01 05:47:56.230 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:47:56.236 eqi = None
2025-07-01 05:47:56.242
2025-07-01 05:47:56.248 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:47:56.253 # identical
2025-07-01 05:47:56.258
2025-07-01 05:47:56.262 # pump out diffs from before the synch point
2025-07-01 05:47:56.267 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:47:56.272
2025-07-01 05:47:56.276 # do intraline marking on the synch pair
2025-07-01 05:47:56.281 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:47:56.286 if eqi is None:
2025-07-01 05:47:56.291 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:47:56.297 atags = btags = ""
2025-07-01 05:47:56.303 cruncher.set_seqs(aelt, belt)
2025-07-01 05:47:56.309 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:47:56.316 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:47:56.323 if tag == 'replace':
2025-07-01 05:47:56.329 atags += '^' * la
2025-07-01 05:47:56.334 btags += '^' * lb
2025-07-01 05:47:56.339 elif tag == 'delete':
2025-07-01 05:47:56.351 atags += '-' * la
2025-07-01 05:47:56.361 elif tag == 'insert':
2025-07-01 05:47:56.369 btags += '+' * lb
2025-07-01 05:47:56.377 elif tag == 'equal':
2025-07-01 05:47:56.384 atags += ' ' * la
2025-07-01 05:47:56.390 btags += ' ' * lb
2025-07-01 05:47:56.395 else:
2025-07-01 05:47:56.401 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:47:56.406 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:47:56.411 else:
2025-07-01 05:47:56.416 # the synch pair is identical
2025-07-01 05:47:56.422 yield ' ' + aelt
2025-07-01 05:47:56.427
2025-07-01 05:47:56.434 # pump out diffs from after the synch point
2025-07-01 05:47:56.442 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:47:56.452
2025-07-01 05:47:56.464 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:47:56.476 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:47:56.487
2025-07-01 05:47:56.497 self = <difflib.Differ object at [hex]>
2025-07-01 05:47:56.509 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:47:56.521 alo = 264, ahi = 1101
2025-07-01 05:47:56.531 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:47:56.539 blo = 264, bhi = 1101
2025-07-01 05:47:56.545
2025-07-01 05:47:56.550 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:47:56.555 g = []
2025-07-01 05:47:56.561 if alo < ahi:
2025-07-01 05:47:56.573 if blo < bhi:
2025-07-01 05:47:56.582 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:47:56.591 else:
2025-07-01 05:47:56.600 g = self._dump('-', a, alo, ahi)
2025-07-01 05:47:56.608 elif blo < bhi:
2025-07-01 05:47:56.617 g = self._dump('+', b, blo, bhi)
2025-07-01 05:47:56.624
2025-07-01 05:47:56.632 > yield from g
2025-07-01 05:47:56.636
2025-07-01 05:47:56.642 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:47:56.650 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:47:56.661
2025-07-01 05:47:56.672 self = <difflib.Differ object at [hex]>
2025-07-01 05:47:56.682 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:47:56.688 alo = 264, ahi = 1101
2025-07-01 05:47:56.701 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:47:56.710 blo = 264, bhi = 1101
2025-07-01 05:47:56.718
2025-07-01 05:47:56.729 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:47:56.738 r"""
2025-07-01 05:47:56.750 When replacing one block of lines with another, search the blocks
2025-07-01 05:47:56.761 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:47:56.773 synch point, and intraline difference marking is done on the
2025-07-01 05:47:56.784 similar pair. Lots of work, but often worth it.
2025-07-01 05:47:56.795
2025-07-01 05:47:56.803 Example:
2025-07-01 05:47:56.810
2025-07-01 05:47:56.818 >>> d = Differ()
2025-07-01 05:47:56.828 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:47:56.837 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:47:56.844 >>> print(''.join(results), end="")
2025-07-01 05:47:56.850 - abcDefghiJkl
2025-07-01 05:47:56.862 + abcdefGhijkl
2025-07-01 05:47:56.875 """
2025-07-01 05:47:56.887
2025-07-01 05:47:56.895 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:47:56.904 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:47:56.910 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:47:56.916 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:47:56.922 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:47:56.934
2025-07-01 05:47:56.944 # search for the pair that matches best without being identical
2025-07-01 05:47:56.953 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:47:56.959 # on junk -- unless we have to)
2025-07-01 05:47:56.966 for j in range(blo, bhi):
2025-07-01 05:47:56.972 bj = b[j]
2025-07-01 05:47:56.978 cruncher.set_seq2(bj)
2025-07-01 05:47:56.984 for i in range(alo, ahi):
2025-07-01 05:47:56.990 ai = a[i]
2025-07-01 05:47:56.997 if ai == bj:
2025-07-01 05:47:57.004 if eqi is None:
2025-07-01 05:47:57.011 eqi, eqj = i, j
2025-07-01 05:47:57.018 continue
2025-07-01 05:47:57.025 cruncher.set_seq1(ai)
2025-07-01 05:47:57.032 # computing similarity is expensive, so use the quick
2025-07-01 05:47:57.039 # upper bounds first -- have seen this speed up messy
2025-07-01 05:47:57.047 # compares by a factor of 3.
2025-07-01 05:47:57.058 # note that ratio() is only expensive to compute the first
2025-07-01 05:47:57.065 # time it's called on a sequence pair; the expensive part
2025-07-01 05:47:57.076 # of the computation is cached by cruncher
2025-07-01 05:47:57.085 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:47:57.092 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:47:57.098 cruncher.ratio() > best_ratio:
2025-07-01 05:47:57.102 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:47:57.107 if best_ratio < cutoff:
2025-07-01 05:47:57.112 # no non-identical "pretty close" pair
2025-07-01 05:47:57.117 if eqi is None:
2025-07-01 05:47:57.121 # no identical pair either -- treat it as a straight replace
2025-07-01 05:47:57.127 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:47:57.132 return
2025-07-01 05:47:57.138 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:47:57.144 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:47:57.149 else:
2025-07-01 05:47:57.156 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:47:57.165 eqi = None
2025-07-01 05:47:57.171
2025-07-01 05:47:57.177 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:47:57.189 # identical
2025-07-01 05:47:57.197
2025-07-01 05:47:57.205 # pump out diffs from before the synch point
2025-07-01 05:47:57.211 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:47:57.216
2025-07-01 05:47:57.222 # do intraline marking on the synch pair
2025-07-01 05:47:57.228 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:47:57.234 if eqi is None:
2025-07-01 05:47:57.242 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:47:57.250 atags = btags = ""
2025-07-01 05:47:57.256 cruncher.set_seqs(aelt, belt)
2025-07-01 05:47:57.262 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:47:57.267 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:47:57.272 if tag == 'replace':
2025-07-01 05:47:57.276 atags += '^' * la
2025-07-01 05:47:57.281 btags += '^' * lb
2025-07-01 05:47:57.285 elif tag == 'delete':
2025-07-01 05:47:57.291 atags += '-' * la
2025-07-01 05:47:57.299 elif tag == 'insert':
2025-07-01 05:47:57.311 btags += '+' * lb
2025-07-01 05:47:57.319 elif tag == 'equal':
2025-07-01 05:47:57.326 atags += ' ' * la
2025-07-01 05:47:57.333 btags += ' ' * lb
2025-07-01 05:47:57.338 else:
2025-07-01 05:47:57.343 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:47:57.348 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:47:57.352 else:
2025-07-01 05:47:57.357 # the synch pair is identical
2025-07-01 05:47:57.363 yield ' ' + aelt
2025-07-01 05:47:57.369
2025-07-01 05:47:57.375 # pump out diffs from after the synch point
2025-07-01 05:47:57.381 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:47:57.388
2025-07-01 05:47:57.399 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:47:57.406 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:47:57.412
2025-07-01 05:47:57.419 self = <difflib.Differ object at [hex]>
2025-07-01 05:47:57.425 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:47:57.432 alo = 265, ahi = 1101
2025-07-01 05:47:57.443 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:47:57.451 blo = 265, bhi = 1101
2025-07-01 05:47:57.457
2025-07-01 05:47:57.464 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:47:57.470 g = []
2025-07-01 05:47:57.476 if alo < ahi:
2025-07-01 05:47:57.482 if blo < bhi:
2025-07-01 05:47:57.488 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:47:57.494 else:
2025-07-01 05:47:57.500 g = self._dump('-', a, alo, ahi)
2025-07-01 05:47:57.506 elif blo < bhi:
2025-07-01 05:47:57.512 g = self._dump('+', b, blo, bhi)
2025-07-01 05:47:57.519
2025-07-01 05:47:57.532 > yield from g
2025-07-01 05:47:57.541
2025-07-01 05:47:57.549 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:47:57.561 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:47:57.569
2025-07-01 05:47:57.576 self = <difflib.Differ object at [hex]>
2025-07-01 05:47:57.582 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:47:57.588 alo = 265, ahi = 1101
2025-07-01 05:47:57.598 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:47:57.609 blo = 265, bhi = 1101
2025-07-01 05:47:57.619
2025-07-01 05:47:57.626 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:47:57.633 r"""
2025-07-01 05:47:57.639 When replacing one block of lines with another, search the blocks
2025-07-01 05:47:57.645 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:47:57.652 synch point, and intraline difference marking is done on the
2025-07-01 05:47:57.659 similar pair. Lots of work, but often worth it.
2025-07-01 05:47:57.666
2025-07-01 05:47:57.673 Example:
2025-07-01 05:47:57.680
2025-07-01 05:47:57.687 >>> d = Differ()
2025-07-01 05:47:57.694 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:47:57.703 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:47:57.715 >>> print(''.join(results), end="")
2025-07-01 05:47:57.724 - abcDefghiJkl
2025-07-01 05:47:57.739 + abcdefGhijkl
2025-07-01 05:47:57.752 """
2025-07-01 05:47:57.758
2025-07-01 05:47:57.764 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:47:57.770 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:47:57.776 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:47:57.782 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:47:57.789 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:47:57.796
2025-07-01 05:47:57.803 # search for the pair that matches best without being identical
2025-07-01 05:47:57.810 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:47:57.817 # on junk -- unless we have to)
2025-07-01 05:47:57.824 for j in range(blo, bhi):
2025-07-01 05:47:57.834 bj = b[j]
2025-07-01 05:47:57.845 cruncher.set_seq2(bj)
2025-07-01 05:47:57.854 for i in range(alo, ahi):
2025-07-01 05:47:57.861 ai = a[i]
2025-07-01 05:47:57.866 if ai == bj:
2025-07-01 05:47:57.872 if eqi is None:
2025-07-01 05:47:57.876 eqi, eqj = i, j
2025-07-01 05:47:57.881 continue
2025-07-01 05:47:57.887 cruncher.set_seq1(ai)
2025-07-01 05:47:57.893 # computing similarity is expensive, so use the quick
2025-07-01 05:47:57.899 # upper bounds first -- have seen this speed up messy
2025-07-01 05:47:57.906 # compares by a factor of 3.
2025-07-01 05:47:57.914 # note that ratio() is only expensive to compute the first
2025-07-01 05:47:57.921 # time it's called on a sequence pair; the expensive part
2025-07-01 05:47:57.928 # of the computation is cached by cruncher
2025-07-01 05:47:57.935 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:47:57.943 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:47:57.950 cruncher.ratio() > best_ratio:
2025-07-01 05:47:57.957 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:47:57.964 if best_ratio < cutoff:
2025-07-01 05:47:57.971 # no non-identical "pretty close" pair
2025-07-01 05:47:57.977 if eqi is None:
2025-07-01 05:47:57.982 # no identical pair either -- treat it as a straight replace
2025-07-01 05:47:57.987 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:47:57.992 return
2025-07-01 05:47:57.998 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:47:58.005 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:47:58.011 else:
2025-07-01 05:47:58.020 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:47:58.030 eqi = None
2025-07-01 05:47:58.036
2025-07-01 05:47:58.043 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:47:58.050 # identical
2025-07-01 05:47:58.056
2025-07-01 05:47:58.062 # pump out diffs from before the synch point
2025-07-01 05:47:58.070 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:47:58.079
2025-07-01 05:47:58.087 # do intraline marking on the synch pair
2025-07-01 05:47:58.093 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:47:58.099 if eqi is None:
2025-07-01 05:47:58.106 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:47:58.116 atags = btags = ""
2025-07-01 05:47:58.125 cruncher.set_seqs(aelt, belt)
2025-07-01 05:47:58.133 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:47:58.139 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:47:58.145 if tag == 'replace':
2025-07-01 05:47:58.150 atags += '^' * la
2025-07-01 05:47:58.156 btags += '^' * lb
2025-07-01 05:47:58.163 elif tag == 'delete':
2025-07-01 05:47:58.168 atags += '-' * la
2025-07-01 05:47:58.173 elif tag == 'insert':
2025-07-01 05:47:58.179 btags += '+' * lb
2025-07-01 05:47:58.186 elif tag == 'equal':
2025-07-01 05:47:58.192 atags += ' ' * la
2025-07-01 05:47:58.199 btags += ' ' * lb
2025-07-01 05:47:58.207 else:
2025-07-01 05:47:58.220 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:47:58.229 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:47:58.236 else:
2025-07-01 05:47:58.243 # the synch pair is identical
2025-07-01 05:47:58.251 yield ' ' + aelt
2025-07-01 05:47:58.262
2025-07-01 05:47:58.270 # pump out diffs from after the synch point
2025-07-01 05:47:58.276 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:47:58.283
2025-07-01 05:47:58.290 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:47:58.302 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:47:58.312
2025-07-01 05:47:58.320 self = <difflib.Differ object at [hex]>
2025-07-01 05:47:58.327 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:47:58.333 alo = 268, ahi = 1101
2025-07-01 05:47:58.339 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:47:58.345 blo = 268, bhi = 1101
2025-07-01 05:47:58.351
2025-07-01 05:47:58.358 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:47:58.364 g = []
2025-07-01 05:47:58.371 if alo < ahi:
2025-07-01 05:47:58.377 if blo < bhi:
2025-07-01 05:47:58.383 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:47:58.391 else:
2025-07-01 05:47:58.397 g = self._dump('-', a, alo, ahi)
2025-07-01 05:47:58.402 elif blo < bhi:
2025-07-01 05:47:58.409 g = self._dump('+', b, blo, bhi)
2025-07-01 05:47:58.414
2025-07-01 05:47:58.421 > yield from g
2025-07-01 05:47:58.428
2025-07-01 05:47:58.435 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:47:58.442 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:47:58.449
2025-07-01 05:47:58.456 self = <difflib.Differ object at [hex]>
2025-07-01 05:47:58.462 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:47:58.468 alo = 268, ahi = 1101
2025-07-01 05:47:58.474 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:47:58.479 blo = 268, bhi = 1101
2025-07-01 05:47:58.485
2025-07-01 05:47:58.492 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:47:58.499 r"""
2025-07-01 05:47:58.505 When replacing one block of lines with another, search the blocks
2025-07-01 05:47:58.513 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:47:58.520 synch point, and intraline difference marking is done on the
2025-07-01 05:47:58.527 similar pair. Lots of work, but often worth it.
2025-07-01 05:47:58.533
2025-07-01 05:47:58.540 Example:
2025-07-01 05:47:58.546
2025-07-01 05:47:58.553 >>> d = Differ()
2025-07-01 05:47:58.560 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:47:58.566 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:47:58.575 >>> print(''.join(results), end="")
2025-07-01 05:47:58.586 - abcDefghiJkl
2025-07-01 05:47:58.599 + abcdefGhijkl
2025-07-01 05:47:58.610 """
2025-07-01 05:47:58.615
2025-07-01 05:47:58.620 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:47:58.625 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:47:58.630 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:47:58.635 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:47:58.641 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:47:58.646
2025-07-01 05:47:58.653 # search for the pair that matches best without being identical
2025-07-01 05:47:58.659 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:47:58.667 # on junk -- unless we have to)
2025-07-01 05:47:58.678 for j in range(blo, bhi):
2025-07-01 05:47:58.688 bj = b[j]
2025-07-01 05:47:58.696 cruncher.set_seq2(bj)
2025-07-01 05:47:58.702 for i in range(alo, ahi):
2025-07-01 05:47:58.708 ai = a[i]
2025-07-01 05:47:58.712 if ai == bj:
2025-07-01 05:47:58.717 if eqi is None:
2025-07-01 05:47:58.724 eqi, eqj = i, j
2025-07-01 05:47:58.730 continue
2025-07-01 05:47:58.736 cruncher.set_seq1(ai)
2025-07-01 05:47:58.742 # computing similarity is expensive, so use the quick
2025-07-01 05:47:58.751 # upper bounds first -- have seen this speed up messy
2025-07-01 05:47:58.759 # compares by a factor of 3.
2025-07-01 05:47:58.767 # note that ratio() is only expensive to compute the first
2025-07-01 05:47:58.772 # time it's called on a sequence pair; the expensive part
2025-07-01 05:47:58.777 # of the computation is cached by cruncher
2025-07-01 05:47:58.782 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:47:58.787 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:47:58.791 cruncher.ratio() > best_ratio:
2025-07-01 05:47:58.796 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:47:58.800 if best_ratio < cutoff:
2025-07-01 05:47:58.806 # no non-identical "pretty close" pair
2025-07-01 05:47:58.811 if eqi is None:
2025-07-01 05:47:58.817 # no identical pair either -- treat it as a straight replace
2025-07-01 05:47:58.822 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:47:58.829 return
2025-07-01 05:47:58.836 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:47:58.843 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:47:58.848 else:
2025-07-01 05:47:58.853 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:47:58.859 eqi = None
2025-07-01 05:47:58.864
2025-07-01 05:47:58.870 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:47:58.876 # identical
2025-07-01 05:47:58.882
2025-07-01 05:47:58.888 # pump out diffs from before the synch point
2025-07-01 05:47:58.895 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:47:58.902
2025-07-01 05:47:58.909 # do intraline marking on the synch pair
2025-07-01 05:47:58.915 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:47:58.922 if eqi is None:
2025-07-01 05:47:58.929 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:47:58.936 atags = btags = ""
2025-07-01 05:47:58.943 cruncher.set_seqs(aelt, belt)
2025-07-01 05:47:58.951 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:47:58.963 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:47:58.974 if tag == 'replace':
2025-07-01 05:47:58.982 atags += '^' * la
2025-07-01 05:47:58.990 btags += '^' * lb
2025-07-01 05:47:58.997 elif tag == 'delete':
2025-07-01 05:47:59.003 atags += '-' * la
2025-07-01 05:47:59.010 elif tag == 'insert':
2025-07-01 05:47:59.016 btags += '+' * lb
2025-07-01 05:47:59.022 elif tag == 'equal':
2025-07-01 05:47:59.027 atags += ' ' * la
2025-07-01 05:47:59.033 btags += ' ' * lb
2025-07-01 05:47:59.042 else:
2025-07-01 05:47:59.050 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:47:59.056 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:47:59.061 else:
2025-07-01 05:47:59.067 # the synch pair is identical
2025-07-01 05:47:59.074 yield ' ' + aelt
2025-07-01 05:47:59.081
2025-07-01 05:47:59.087 # pump out diffs from after the synch point
2025-07-01 05:47:59.095 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:47:59.101
2025-07-01 05:47:59.108 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:47:59.114 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:47:59.122
2025-07-01 05:47:59.132 self = <difflib.Differ object at [hex]>
2025-07-01 05:47:59.142 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:47:59.150 alo = 269, ahi = 1101
2025-07-01 05:47:59.158 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:47:59.169 blo = 269, bhi = 1101
2025-07-01 05:47:59.180
2025-07-01 05:47:59.187 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:47:59.194 g = []
2025-07-01 05:47:59.200 if alo < ahi:
2025-07-01 05:47:59.207 if blo < bhi:
2025-07-01 05:47:59.215 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:47:59.230 else:
2025-07-01 05:47:59.240 g = self._dump('-', a, alo, ahi)
2025-07-01 05:47:59.248 elif blo < bhi:
2025-07-01 05:47:59.255 g = self._dump('+', b, blo, bhi)
2025-07-01 05:47:59.260
2025-07-01 05:47:59.270 > yield from g
2025-07-01 05:47:59.282
2025-07-01 05:47:59.293 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:47:59.303 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:47:59.311
2025-07-01 05:47:59.319 self = <difflib.Differ object at [hex]>
2025-07-01 05:47:59.327 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:47:59.335 alo = 269, ahi = 1101
2025-07-01 05:47:59.344 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:47:59.350 blo = 269, bhi = 1101
2025-07-01 05:47:59.357
2025-07-01 05:47:59.363 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:47:59.370 r"""
2025-07-01 05:47:59.381 When replacing one block of lines with another, search the blocks
2025-07-01 05:47:59.391 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:47:59.399 synch point, and intraline difference marking is done on the
2025-07-01 05:47:59.406 similar pair. Lots of work, but often worth it.
2025-07-01 05:47:59.415
2025-07-01 05:47:59.421 Example:
2025-07-01 05:47:59.427
2025-07-01 05:47:59.433 >>> d = Differ()
2025-07-01 05:47:59.440 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:47:59.445 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:47:59.450 >>> print(''.join(results), end="")
2025-07-01 05:47:59.455 - abcDefghiJkl
2025-07-01 05:47:59.467 + abcdefGhijkl
2025-07-01 05:47:59.481 """
2025-07-01 05:47:59.487
2025-07-01 05:47:59.494 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:47:59.503 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:47:59.516 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:47:59.528 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:47:59.539 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:47:59.547
2025-07-01 05:47:59.553 # search for the pair that matches best without being identical
2025-07-01 05:47:59.559 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:47:59.565 # on junk -- unless we have to)
2025-07-01 05:47:59.570 for j in range(blo, bhi):
2025-07-01 05:47:59.578 bj = b[j]
2025-07-01 05:47:59.586 cruncher.set_seq2(bj)
2025-07-01 05:47:59.592 for i in range(alo, ahi):
2025-07-01 05:47:59.597 ai = a[i]
2025-07-01 05:47:59.602 if ai == bj:
2025-07-01 05:47:59.606 if eqi is None:
2025-07-01 05:47:59.611 eqi, eqj = i, j
2025-07-01 05:47:59.615 continue
2025-07-01 05:47:59.620 cruncher.set_seq1(ai)
2025-07-01 05:47:59.625 # computing similarity is expensive, so use the quick
2025-07-01 05:47:59.629 # upper bounds first -- have seen this speed up messy
2025-07-01 05:47:59.633 # compares by a factor of 3.
2025-07-01 05:47:59.638 # note that ratio() is only expensive to compute the first
2025-07-01 05:47:59.642 # time it's called on a sequence pair; the expensive part
2025-07-01 05:47:59.647 # of the computation is cached by cruncher
2025-07-01 05:47:59.651 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:47:59.656 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:47:59.667 cruncher.ratio() > best_ratio:
2025-07-01 05:47:59.674 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:47:59.679 if best_ratio < cutoff:
2025-07-01 05:47:59.687 # no non-identical "pretty close" pair
2025-07-01 05:47:59.698 if eqi is None:
2025-07-01 05:47:59.709 # no identical pair either -- treat it as a straight replace
2025-07-01 05:47:59.721 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:47:59.730 return
2025-07-01 05:47:59.738 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:47:59.745 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:47:59.751 else:
2025-07-01 05:47:59.758 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:47:59.764 eqi = None
2025-07-01 05:47:59.776
2025-07-01 05:47:59.784 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:47:59.791 # identical
2025-07-01 05:47:59.801
2025-07-01 05:47:59.811 # pump out diffs from before the synch point
2025-07-01 05:47:59.817 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:47:59.823
2025-07-01 05:47:59.834 # do intraline marking on the synch pair
2025-07-01 05:47:59.845 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:47:59.853 if eqi is None:
2025-07-01 05:47:59.861 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:47:59.867 atags = btags = ""
2025-07-01 05:47:59.874 cruncher.set_seqs(aelt, belt)
2025-07-01 05:47:59.885 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:47:59.894 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:47:59.901 if tag == 'replace':
2025-07-01 05:47:59.907 atags += '^' * la
2025-07-01 05:47:59.914 btags += '^' * lb
2025-07-01 05:47:59.925 elif tag == 'delete':
2025-07-01 05:47:59.936 atags += '-' * la
2025-07-01 05:47:59.948 elif tag == 'insert':
2025-07-01 05:47:59.960 btags += '+' * lb
2025-07-01 05:47:59.971 elif tag == 'equal':
2025-07-01 05:47:59.982 atags += ' ' * la
2025-07-01 05:47:59.991 btags += ' ' * lb
2025-07-01 05:47:59.999 else:
2025-07-01 05:48:00.007 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:48:00.016 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:48:00.022 else:
2025-07-01 05:48:00.029 # the synch pair is identical
2025-07-01 05:48:00.036 yield ' ' + aelt
2025-07-01 05:48:00.043
2025-07-01 05:48:00.051 # pump out diffs from after the synch point
2025-07-01 05:48:00.060 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:48:00.072
2025-07-01 05:48:00.080 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:48:00.087 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:48:00.093
2025-07-01 05:48:00.099 self = <difflib.Differ object at [hex]>
2025-07-01 05:48:00.106 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:48:00.113 alo = 270, ahi = 1101
2025-07-01 05:48:00.120 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:48:00.126 blo = 270, bhi = 1101
2025-07-01 05:48:00.132
2025-07-01 05:48:00.138 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:48:00.146 g = []
2025-07-01 05:48:00.153 if alo < ahi:
2025-07-01 05:48:00.160 if blo < bhi:
2025-07-01 05:48:00.167 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:48:00.173 else:
2025-07-01 05:48:00.183 g = self._dump('-', a, alo, ahi)
2025-07-01 05:48:00.195 elif blo < bhi:
2025-07-01 05:48:00.204 g = self._dump('+', b, blo, bhi)
2025-07-01 05:48:00.222
2025-07-01 05:48:00.230 > yield from g
2025-07-01 05:48:00.238
2025-07-01 05:48:00.244 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:48:00.251 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:48:00.258
2025-07-01 05:48:00.265 self = <difflib.Differ object at [hex]>
2025-07-01 05:48:00.271 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:48:00.276 alo = 270, ahi = 1101
2025-07-01 05:48:00.281 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:48:00.289 blo = 270, bhi = 1101
2025-07-01 05:48:00.295
2025-07-01 05:48:00.301 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:48:00.312 r"""
2025-07-01 05:48:00.320 When replacing one block of lines with another, search the blocks
2025-07-01 05:48:00.327 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:48:00.333 synch point, and intraline difference marking is done on the
2025-07-01 05:48:00.339 similar pair. Lots of work, but often worth it.
2025-07-01 05:48:00.344
2025-07-01 05:48:00.349 Example:
2025-07-01 05:48:00.361
2025-07-01 05:48:00.371 >>> d = Differ()
2025-07-01 05:48:00.383 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:48:00.390 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:48:00.403 >>> print(''.join(results), end="")
2025-07-01 05:48:00.411 - abcDefghiJkl
2025-07-01 05:48:00.423 + abcdefGhijkl
2025-07-01 05:48:00.432 """
2025-07-01 05:48:00.437
2025-07-01 05:48:00.443 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:48:00.448 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:48:00.454 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:48:00.460 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:48:00.469 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:48:00.477
2025-07-01 05:48:00.484 # search for the pair that matches best without being identical
2025-07-01 05:48:00.489 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:48:00.495 # on junk -- unless we have to)
2025-07-01 05:48:00.507 for j in range(blo, bhi):
2025-07-01 05:48:00.516 bj = b[j]
2025-07-01 05:48:00.524 cruncher.set_seq2(bj)
2025-07-01 05:48:00.531 for i in range(alo, ahi):
2025-07-01 05:48:00.539 ai = a[i]
2025-07-01 05:48:00.547 if ai == bj:
2025-07-01 05:48:00.553 if eqi is None:
2025-07-01 05:48:00.560 eqi, eqj = i, j
2025-07-01 05:48:00.566 continue
2025-07-01 05:48:00.573 cruncher.set_seq1(ai)
2025-07-01 05:48:00.579 # computing similarity is expensive, so use the quick
2025-07-01 05:48:00.585 # upper bounds first -- have seen this speed up messy
2025-07-01 05:48:00.591 # compares by a factor of 3.
2025-07-01 05:48:00.599 # note that ratio() is only expensive to compute the first
2025-07-01 05:48:00.607 # time it's called on a sequence pair; the expensive part
2025-07-01 05:48:00.614 # of the computation is cached by cruncher
2025-07-01 05:48:00.627 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:48:00.637 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:48:00.649 cruncher.ratio() > best_ratio:
2025-07-01 05:48:00.660 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:48:00.668 if best_ratio < cutoff:
2025-07-01 05:48:00.675 # no non-identical "pretty close" pair
2025-07-01 05:48:00.682 if eqi is None:
2025-07-01 05:48:00.688 # no identical pair either -- treat it as a straight replace
2025-07-01 05:48:00.694 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:48:00.703 return
2025-07-01 05:48:00.712 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:48:00.720 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:48:00.726 else:
2025-07-01 05:48:00.733 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:48:00.739 eqi = None
2025-07-01 05:48:00.745
2025-07-01 05:48:00.751 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:48:00.757 # identical
2025-07-01 05:48:00.762
2025-07-01 05:48:00.774 # pump out diffs from before the synch point
2025-07-01 05:48:00.787 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:48:00.799
2025-07-01 05:48:00.808 # do intraline marking on the synch pair
2025-07-01 05:48:00.816 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:48:00.823 if eqi is None:
2025-07-01 05:48:00.831 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:48:00.842 atags = btags = ""
2025-07-01 05:48:00.851 cruncher.set_seqs(aelt, belt)
2025-07-01 05:48:00.858 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:48:00.865 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:48:00.872 if tag == 'replace':
2025-07-01 05:48:00.879 atags += '^' * la
2025-07-01 05:48:00.888 btags += '^' * lb
2025-07-01 05:48:00.897 elif tag == 'delete':
2025-07-01 05:48:00.906 atags += '-' * la
2025-07-01 05:48:00.912 elif tag == 'insert':
2025-07-01 05:48:00.918 btags += '+' * lb
2025-07-01 05:48:00.928 elif tag == 'equal':
2025-07-01 05:48:00.937 atags += ' ' * la
2025-07-01 05:48:00.944 btags += ' ' * lb
2025-07-01 05:48:00.950 else:
2025-07-01 05:48:00.955 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:48:00.960 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:48:00.965 else:
2025-07-01 05:48:00.971 # the synch pair is identical
2025-07-01 05:48:00.977 yield ' ' + aelt
2025-07-01 05:48:00.982
2025-07-01 05:48:00.989 # pump out diffs from after the synch point
2025-07-01 05:48:00.995 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:48:01.001
2025-07-01 05:48:01.006 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:48:01.011 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:48:01.016
2025-07-01 05:48:01.022 self = <difflib.Differ object at [hex]>
2025-07-01 05:48:01.028 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:48:01.035 alo = 271, ahi = 1101
2025-07-01 05:48:01.043 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:48:01.051 blo = 271, bhi = 1101
2025-07-01 05:48:01.062
2025-07-01 05:48:01.070 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:48:01.076 g = []
2025-07-01 05:48:01.082 if alo < ahi:
2025-07-01 05:48:01.088 if blo < bhi:
2025-07-01 05:48:01.093 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:48:01.099 else:
2025-07-01 05:48:01.106 g = self._dump('-', a, alo, ahi)
2025-07-01 05:48:01.113 elif blo < bhi:
2025-07-01 05:48:01.120 g = self._dump('+', b, blo, bhi)
2025-07-01 05:48:01.125
2025-07-01 05:48:01.132 > yield from g
2025-07-01 05:48:01.139
2025-07-01 05:48:01.147 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:48:01.158 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:48:01.166
2025-07-01 05:48:01.173 self = <difflib.Differ object at [hex]>
2025-07-01 05:48:01.179 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:48:01.185 alo = 271, ahi = 1101
2025-07-01 05:48:01.191 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:48:01.198 blo = 271, bhi = 1101
2025-07-01 05:48:01.208
2025-07-01 05:48:01.217 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:48:01.225 r"""
2025-07-01 05:48:01.231 When replacing one block of lines with another, search the blocks
2025-07-01 05:48:01.237 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:48:01.244 synch point, and intraline difference marking is done on the
2025-07-01 05:48:01.250 similar pair. Lots of work, but often worth it.
2025-07-01 05:48:01.259
2025-07-01 05:48:01.270 Example:
2025-07-01 05:48:01.279
2025-07-01 05:48:01.286 >>> d = Differ()
2025-07-01 05:48:01.293 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:48:01.299 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:48:01.305 >>> print(''.join(results), end="")
2025-07-01 05:48:01.312 - abcDefghiJkl
2025-07-01 05:48:01.326 + abcdefGhijkl
2025-07-01 05:48:01.338 """
2025-07-01 05:48:01.343
2025-07-01 05:48:01.349 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:48:01.356 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:48:01.362 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:48:01.369 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:48:01.374 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:48:01.383
2025-07-01 05:48:01.393 # search for the pair that matches best without being identical
2025-07-01 05:48:01.400 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:48:01.407 # on junk -- unless we have to)
2025-07-01 05:48:01.412 for j in range(blo, bhi):
2025-07-01 05:48:01.417 bj = b[j]
2025-07-01 05:48:01.422 cruncher.set_seq2(bj)
2025-07-01 05:48:01.428 for i in range(alo, ahi):
2025-07-01 05:48:01.433 ai = a[i]
2025-07-01 05:48:01.440 if ai == bj:
2025-07-01 05:48:01.446 if eqi is None:
2025-07-01 05:48:01.451 eqi, eqj = i, j
2025-07-01 05:48:01.458 continue
2025-07-01 05:48:01.469 cruncher.set_seq1(ai)
2025-07-01 05:48:01.475 # computing similarity is expensive, so use the quick
2025-07-01 05:48:01.481 # upper bounds first -- have seen this speed up messy
2025-07-01 05:48:01.492 # compares by a factor of 3.
2025-07-01 05:48:01.502 # note that ratio() is only expensive to compute the first
2025-07-01 05:48:01.509 # time it's called on a sequence pair; the expensive part
2025-07-01 05:48:01.516 # of the computation is cached by cruncher
2025-07-01 05:48:01.522 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:48:01.530 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:48:01.540 cruncher.ratio() > best_ratio:
2025-07-01 05:48:01.551 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:48:01.557 if best_ratio < cutoff:
2025-07-01 05:48:01.564 # no non-identical "pretty close" pair
2025-07-01 05:48:01.570 if eqi is None:
2025-07-01 05:48:01.576 # no identical pair either -- treat it as a straight replace
2025-07-01 05:48:01.582 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:48:01.588 return
2025-07-01 05:48:01.594 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:48:01.600 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:48:01.606 else:
2025-07-01 05:48:01.613 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:48:01.620 eqi = None
2025-07-01 05:48:01.628
2025-07-01 05:48:01.637 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:48:01.647 # identical
2025-07-01 05:48:01.659
2025-07-01 05:48:01.671 # pump out diffs from before the synch point
2025-07-01 05:48:01.684 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:48:01.693
2025-07-01 05:48:01.703 # do intraline marking on the synch pair
2025-07-01 05:48:01.710 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:48:01.721 if eqi is None:
2025-07-01 05:48:01.731 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:48:01.739 atags = btags = ""
2025-07-01 05:48:01.745 cruncher.set_seqs(aelt, belt)
2025-07-01 05:48:01.750 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:48:01.761 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:48:01.767 if tag == 'replace':
2025-07-01 05:48:01.773 atags += '^' * la
2025-07-01 05:48:01.780 btags += '^' * lb
2025-07-01 05:48:01.787 elif tag == 'delete':
2025-07-01 05:48:01.794 atags += '-' * la
2025-07-01 05:48:01.801 elif tag == 'insert':
2025-07-01 05:48:01.814 btags += '+' * lb
2025-07-01 05:48:01.825 elif tag == 'equal':
2025-07-01 05:48:01.834 atags += ' ' * la
2025-07-01 05:48:01.846 btags += ' ' * lb
2025-07-01 05:48:01.859 else:
2025-07-01 05:48:01.869 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:48:01.878 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:48:01.885 else:
2025-07-01 05:48:01.891 # the synch pair is identical
2025-07-01 05:48:01.897 yield ' ' + aelt
2025-07-01 05:48:01.904
2025-07-01 05:48:01.912 # pump out diffs from after the synch point
2025-07-01 05:48:01.918 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:48:01.923
2025-07-01 05:48:01.929 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:48:01.936 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:48:01.942
2025-07-01 05:48:01.949 self = <difflib.Differ object at [hex]>
2025-07-01 05:48:01.955 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:48:01.961 alo = 272, ahi = 1101
2025-07-01 05:48:01.970 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:48:01.978 blo = 272, bhi = 1101
2025-07-01 05:48:01.985
2025-07-01 05:48:01.992 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:48:01.998 g = []
2025-07-01 05:48:02.005 if alo < ahi:
2025-07-01 05:48:02.011 if blo < bhi:
2025-07-01 05:48:02.018 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:48:02.025 else:
2025-07-01 05:48:02.038 g = self._dump('-', a, alo, ahi)
2025-07-01 05:48:02.044 elif blo < bhi:
2025-07-01 05:48:02.051 g = self._dump('+', b, blo, bhi)
2025-07-01 05:48:02.057
2025-07-01 05:48:02.063 > yield from g
2025-07-01 05:48:02.068
2025-07-01 05:48:02.074 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:48:02.079 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:48:02.084
2025-07-01 05:48:02.091 self = <difflib.Differ object at [hex]>
2025-07-01 05:48:02.100 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:48:02.107 alo = 272, ahi = 1101
2025-07-01 05:48:02.115 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:48:02.122 blo = 272, bhi = 1101
2025-07-01 05:48:02.128
2025-07-01 05:48:02.136 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:48:02.143 r"""
2025-07-01 05:48:02.151 When replacing one block of lines with another, search the blocks
2025-07-01 05:48:02.158 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:48:02.167 synch point, and intraline difference marking is done on the
2025-07-01 05:48:02.180 similar pair. Lots of work, but often worth it.
2025-07-01 05:48:02.190
2025-07-01 05:48:02.196 Example:
2025-07-01 05:48:02.203
2025-07-01 05:48:02.209 >>> d = Differ()
2025-07-01 05:48:02.216 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:48:02.222 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:48:02.227 >>> print(''.join(results), end="")
2025-07-01 05:48:02.233 - abcDefghiJkl
2025-07-01 05:48:02.245 + abcdefGhijkl
2025-07-01 05:48:02.259 """
2025-07-01 05:48:02.266
2025-07-01 05:48:02.275 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:48:02.287 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:48:02.296 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:48:02.304 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:48:02.314 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:48:02.320
2025-07-01 05:48:02.326 # search for the pair that matches best without being identical
2025-07-01 05:48:02.333 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:48:02.339 # on junk -- unless we have to)
2025-07-01 05:48:02.345 for j in range(blo, bhi):
2025-07-01 05:48:02.356 bj = b[j]
2025-07-01 05:48:02.365 cruncher.set_seq2(bj)
2025-07-01 05:48:02.374 for i in range(alo, ahi):
2025-07-01 05:48:02.383 ai = a[i]
2025-07-01 05:48:02.394 if ai == bj:
2025-07-01 05:48:02.401 if eqi is None:
2025-07-01 05:48:02.407 eqi, eqj = i, j
2025-07-01 05:48:02.413 continue
2025-07-01 05:48:02.418 cruncher.set_seq1(ai)
2025-07-01 05:48:02.424 # computing similarity is expensive, so use the quick
2025-07-01 05:48:02.430 # upper bounds first -- have seen this speed up messy
2025-07-01 05:48:02.436 # compares by a factor of 3.
2025-07-01 05:48:02.442 # note that ratio() is only expensive to compute the first
2025-07-01 05:48:02.451 # time it's called on a sequence pair; the expensive part
2025-07-01 05:48:02.461 # of the computation is cached by cruncher
2025-07-01 05:48:02.468 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:48:02.479 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:48:02.487 cruncher.ratio() > best_ratio:
2025-07-01 05:48:02.496 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:48:02.504 if best_ratio < cutoff:
2025-07-01 05:48:02.511 # no non-identical "pretty close" pair
2025-07-01 05:48:02.517 if eqi is None:
2025-07-01 05:48:02.522 # no identical pair either -- treat it as a straight replace
2025-07-01 05:48:02.528 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:48:02.535 return
2025-07-01 05:48:02.541 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:48:02.547 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:48:02.555 else:
2025-07-01 05:48:02.565 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:48:02.574 eqi = None
2025-07-01 05:48:02.581
2025-07-01 05:48:02.587 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:48:02.594 # identical
2025-07-01 05:48:02.604
2025-07-01 05:48:02.616 # pump out diffs from before the synch point
2025-07-01 05:48:02.628 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:48:02.641
2025-07-01 05:48:02.653 # do intraline marking on the synch pair
2025-07-01 05:48:02.660 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:48:02.669 if eqi is None:
2025-07-01 05:48:02.678 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:48:02.687 atags = btags = ""
2025-07-01 05:48:02.695 cruncher.set_seqs(aelt, belt)
2025-07-01 05:48:02.703 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:48:02.710 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:48:02.718 if tag == 'replace':
2025-07-01 05:48:02.727 atags += '^' * la
2025-07-01 05:48:02.734 btags += '^' * lb
2025-07-01 05:48:02.742 elif tag == 'delete':
2025-07-01 05:48:02.750 atags += '-' * la
2025-07-01 05:48:02.761 elif tag == 'insert':
2025-07-01 05:48:02.768 btags += '+' * lb
2025-07-01 05:48:02.773 elif tag == 'equal':
2025-07-01 05:48:02.779 atags += ' ' * la
2025-07-01 05:48:02.787 btags += ' ' * lb
2025-07-01 05:48:02.793 else:
2025-07-01 05:48:02.799 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:48:02.807 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:48:02.814 else:
2025-07-01 05:48:02.821 # the synch pair is identical
2025-07-01 05:48:02.827 yield ' ' + aelt
2025-07-01 05:48:02.834
2025-07-01 05:48:02.839 # pump out diffs from after the synch point
2025-07-01 05:48:02.845 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:48:02.854
2025-07-01 05:48:02.861 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:48:02.867 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:48:02.876
2025-07-01 05:48:02.886 self = <difflib.Differ object at [hex]>
2025-07-01 05:48:02.899 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:48:02.909 alo = 273, ahi = 1101
2025-07-01 05:48:02.920 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:48:02.933 blo = 273, bhi = 1101
2025-07-01 05:48:02.942
2025-07-01 05:48:02.951 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:48:02.959 g = []
2025-07-01 05:48:02.968 if alo < ahi:
2025-07-01 05:48:02.978 if blo < bhi:
2025-07-01 05:48:02.986 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:48:02.995 else:
2025-07-01 05:48:03.001 g = self._dump('-', a, alo, ahi)
2025-07-01 05:48:03.006 elif blo < bhi:
2025-07-01 05:48:03.013 g = self._dump('+', b, blo, bhi)
2025-07-01 05:48:03.019
2025-07-01 05:48:03.024 > yield from g
2025-07-01 05:48:03.030
2025-07-01 05:48:03.037 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:48:03.045 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:48:03.051
2025-07-01 05:48:03.059 self = <difflib.Differ object at [hex]>
2025-07-01 05:48:03.070 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:48:03.076 alo = 273, ahi = 1101
2025-07-01 05:48:03.086 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:48:03.092 blo = 273, bhi = 1101
2025-07-01 05:48:03.098
2025-07-01 05:48:03.108 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:48:03.116 r"""
2025-07-01 05:48:03.123 When replacing one block of lines with another, search the blocks
2025-07-01 05:48:03.130 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:48:03.137 synch point, and intraline difference marking is done on the
2025-07-01 05:48:03.145 similar pair. Lots of work, but often worth it.
2025-07-01 05:48:03.153
2025-07-01 05:48:03.160 Example:
2025-07-01 05:48:03.166
2025-07-01 05:48:03.178 >>> d = Differ()
2025-07-01 05:48:03.188 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:48:03.196 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:48:03.203 >>> print(''.join(results), end="")
2025-07-01 05:48:03.210 - abcDefghiJkl
2025-07-01 05:48:03.222 + abcdefGhijkl
2025-07-01 05:48:03.233 """
2025-07-01 05:48:03.239
2025-07-01 05:48:03.245 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:48:03.251 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:48:03.257 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:48:03.269 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:48:03.278 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:48:03.284
2025-07-01 05:48:03.290 # search for the pair that matches best without being identical
2025-07-01 05:48:03.297 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:48:03.303 # on junk -- unless we have to)
2025-07-01 05:48:03.309 for j in range(blo, bhi):
2025-07-01 05:48:03.316 bj = b[j]
2025-07-01 05:48:03.323 cruncher.set_seq2(bj)
2025-07-01 05:48:03.331 for i in range(alo, ahi):
2025-07-01 05:48:03.338 ai = a[i]
2025-07-01 05:48:03.350 if ai == bj:
2025-07-01 05:48:03.359 if eqi is None:
2025-07-01 05:48:03.365 eqi, eqj = i, j
2025-07-01 05:48:03.371 continue
2025-07-01 05:48:03.377 cruncher.set_seq1(ai)
2025-07-01 05:48:03.383 # computing similarity is expensive, so use the quick
2025-07-01 05:48:03.389 # upper bounds first -- have seen this speed up messy
2025-07-01 05:48:03.394 # compares by a factor of 3.
2025-07-01 05:48:03.401 # note that ratio() is only expensive to compute the first
2025-07-01 05:48:03.407 # time it's called on a sequence pair; the expensive part
2025-07-01 05:48:03.413 # of the computation is cached by cruncher
2025-07-01 05:48:03.419 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:48:03.426 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:48:03.436 cruncher.ratio() > best_ratio:
2025-07-01 05:48:03.445 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:48:03.452 if best_ratio < cutoff:
2025-07-01 05:48:03.459 # no non-identical "pretty close" pair
2025-07-01 05:48:03.465 if eqi is None:
2025-07-01 05:48:03.478 # no identical pair either -- treat it as a straight replace
2025-07-01 05:48:03.488 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:48:03.496 return
2025-07-01 05:48:03.504 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:48:03.509 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:48:03.517 else:
2025-07-01 05:48:03.525 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:48:03.531 eqi = None
2025-07-01 05:48:03.536
2025-07-01 05:48:03.546 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:48:03.560 # identical
2025-07-01 05:48:03.570
2025-07-01 05:48:03.577 # pump out diffs from before the synch point
2025-07-01 05:48:03.585 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:48:03.593
2025-07-01 05:48:03.600 # do intraline marking on the synch pair
2025-07-01 05:48:03.613 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:48:03.621 if eqi is None:
2025-07-01 05:48:03.627 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:48:03.632 atags = btags = ""
2025-07-01 05:48:03.638 cruncher.set_seqs(aelt, belt)
2025-07-01 05:48:03.643 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:48:03.648 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:48:03.652 if tag == 'replace':
2025-07-01 05:48:03.657 atags += '^' * la
2025-07-01 05:48:03.661 btags += '^' * lb
2025-07-01 05:48:03.666 elif tag == 'delete':
2025-07-01 05:48:03.670 atags += '-' * la
2025-07-01 05:48:03.675 elif tag == 'insert':
2025-07-01 05:48:03.681 btags += '+' * lb
2025-07-01 05:48:03.688 elif tag == 'equal':
2025-07-01 05:48:03.693 atags += ' ' * la
2025-07-01 05:48:03.698 btags += ' ' * lb
2025-07-01 05:48:03.703 else:
2025-07-01 05:48:03.707 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:48:03.712 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:48:03.717 else:
2025-07-01 05:48:03.721 # the synch pair is identical
2025-07-01 05:48:03.730 yield ' ' + aelt
2025-07-01 05:48:03.734
2025-07-01 05:48:03.743 # pump out diffs from after the synch point
2025-07-01 05:48:03.753 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:48:03.761
2025-07-01 05:48:03.767 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:48:03.774 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:48:03.780
2025-07-01 05:48:03.786 self = <difflib.Differ object at [hex]>
2025-07-01 05:48:03.794 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:48:03.802 alo = 274, ahi = 1101
2025-07-01 05:48:03.811 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:48:03.817 blo = 274, bhi = 1101
2025-07-01 05:48:03.824
2025-07-01 05:48:03.830 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:48:03.836 g = []
2025-07-01 05:48:03.843 if alo < ahi:
2025-07-01 05:48:03.857 if blo < bhi:
2025-07-01 05:48:03.864 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:48:03.870 else:
2025-07-01 05:48:03.875 g = self._dump('-', a, alo, ahi)
2025-07-01 05:48:03.881 elif blo < bhi:
2025-07-01 05:48:03.886 g = self._dump('+', b, blo, bhi)
2025-07-01 05:48:03.892
2025-07-01 05:48:03.899 > yield from g
2025-07-01 05:48:03.904
2025-07-01 05:48:03.910 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:48:03.916 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:48:03.922
2025-07-01 05:48:03.933 self = <difflib.Differ object at [hex]>
2025-07-01 05:48:03.946 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:48:03.953 alo = 274, ahi = 1101
2025-07-01 05:48:03.963 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:48:03.972 blo = 274, bhi = 1101
2025-07-01 05:48:03.978
2025-07-01 05:48:03.985 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:48:03.994 r"""
2025-07-01 05:48:04.002 When replacing one block of lines with another, search the blocks
2025-07-01 05:48:04.007 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:48:04.012 synch point, and intraline difference marking is done on the
2025-07-01 05:48:04.017 similar pair. Lots of work, but often worth it.
2025-07-01 05:48:04.022
2025-07-01 05:48:04.027 Example:
2025-07-01 05:48:04.032
2025-07-01 05:48:04.038 >>> d = Differ()
2025-07-01 05:48:04.045 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:48:04.057 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:48:04.067 >>> print(''.join(results), end="")
2025-07-01 05:48:04.077 - abcDefghiJkl
2025-07-01 05:48:04.102 + abcdefGhijkl
2025-07-01 05:48:04.124 """
2025-07-01 05:48:04.132
2025-07-01 05:48:04.137 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:48:04.149 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:48:04.158 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:48:04.164 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:48:04.171 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:48:04.178
2025-07-01 05:48:04.186 # search for the pair that matches best without being identical
2025-07-01 05:48:04.193 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:48:04.207 # on junk -- unless we have to)
2025-07-01 05:48:04.218 for j in range(blo, bhi):
2025-07-01 05:48:04.225 bj = b[j]
2025-07-01 05:48:04.237 cruncher.set_seq2(bj)
2025-07-01 05:48:04.242 for i in range(alo, ahi):
2025-07-01 05:48:04.250 ai = a[i]
2025-07-01 05:48:04.262 if ai == bj:
2025-07-01 05:48:04.272 if eqi is None:
2025-07-01 05:48:04.281 eqi, eqj = i, j
2025-07-01 05:48:04.294 continue
2025-07-01 05:48:04.305 cruncher.set_seq1(ai)
2025-07-01 05:48:04.318 # computing similarity is expensive, so use the quick
2025-07-01 05:48:04.332 # upper bounds first -- have seen this speed up messy
2025-07-01 05:48:04.344 # compares by a factor of 3.
2025-07-01 05:48:04.353 # note that ratio() is only expensive to compute the first
2025-07-01 05:48:04.366 # time it's called on a sequence pair; the expensive part
2025-07-01 05:48:04.377 # of the computation is cached by cruncher
2025-07-01 05:48:04.385 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:48:04.391 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:48:04.398 cruncher.ratio() > best_ratio:
2025-07-01 05:48:04.405 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:48:04.410 if best_ratio < cutoff:
2025-07-01 05:48:04.415 # no non-identical "pretty close" pair
2025-07-01 05:48:04.420 if eqi is None:
2025-07-01 05:48:04.426 # no identical pair either -- treat it as a straight replace
2025-07-01 05:48:04.432 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:48:04.438 return
2025-07-01 05:48:04.449 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:48:04.458 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:48:04.466 else:
2025-07-01 05:48:04.475 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:48:04.480 eqi = None
2025-07-01 05:48:04.485
2025-07-01 05:48:04.491 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:48:04.496 # identical
2025-07-01 05:48:04.502
2025-07-01 05:48:04.507 # pump out diffs from before the synch point
2025-07-01 05:48:04.514 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:48:04.525
2025-07-01 05:48:04.534 # do intraline marking on the synch pair
2025-07-01 05:48:04.541 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:48:04.547 if eqi is None:
2025-07-01 05:48:04.557 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:48:04.563 atags = btags = ""
2025-07-01 05:48:04.569 cruncher.set_seqs(aelt, belt)
2025-07-01 05:48:04.575 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:48:04.582 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:48:04.593 if tag == 'replace':
2025-07-01 05:48:04.604 atags += '^' * la
2025-07-01 05:48:04.615 btags += '^' * lb
2025-07-01 05:48:04.621 elif tag == 'delete':
2025-07-01 05:48:04.627 atags += '-' * la
2025-07-01 05:48:04.633 elif tag == 'insert':
2025-07-01 05:48:04.641 btags += '+' * lb
2025-07-01 05:48:04.651 elif tag == 'equal':
2025-07-01 05:48:04.659 atags += ' ' * la
2025-07-01 05:48:04.666 btags += ' ' * lb
2025-07-01 05:48:04.671 else:
2025-07-01 05:48:04.679 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:48:04.685 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:48:04.691 else:
2025-07-01 05:48:04.698 # the synch pair is identical
2025-07-01 05:48:04.704 yield ' ' + aelt
2025-07-01 05:48:04.709
2025-07-01 05:48:04.715 # pump out diffs from after the synch point
2025-07-01 05:48:04.722 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:48:04.728
2025-07-01 05:48:04.735 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:48:04.741 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:48:04.749
2025-07-01 05:48:04.755 self = <difflib.Differ object at [hex]>
2025-07-01 05:48:04.761 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:48:04.766 alo = 275, ahi = 1101
2025-07-01 05:48:04.773 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:48:04.779 blo = 275, bhi = 1101
2025-07-01 05:48:04.785
2025-07-01 05:48:04.791 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:48:04.798 g = []
2025-07-01 05:48:04.804 if alo < ahi:
2025-07-01 05:48:04.810 if blo < bhi:
2025-07-01 05:48:04.817 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:48:04.823 else:
2025-07-01 05:48:04.831 g = self._dump('-', a, alo, ahi)
2025-07-01 05:48:04.838 elif blo < bhi:
2025-07-01 05:48:04.848 g = self._dump('+', b, blo, bhi)
2025-07-01 05:48:04.858
2025-07-01 05:48:04.864 > yield from g
2025-07-01 05:48:04.870
2025-07-01 05:48:04.877 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:48:04.886 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:48:04.896
2025-07-01 05:48:04.903 self = <difflib.Differ object at [hex]>
2025-07-01 05:48:04.909 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:48:04.915 alo = 275, ahi = 1101
2025-07-01 05:48:04.921 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:48:04.926 blo = 275, bhi = 1101
2025-07-01 05:48:04.930
2025-07-01 05:48:04.935 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:48:04.940 r"""
2025-07-01 05:48:04.946 When replacing one block of lines with another, search the blocks
2025-07-01 05:48:04.951 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:48:04.956 synch point, and intraline difference marking is done on the
2025-07-01 05:48:04.961 similar pair. Lots of work, but often worth it.
2025-07-01 05:48:04.966
2025-07-01 05:48:04.972 Example:
2025-07-01 05:48:04.979
2025-07-01 05:48:04.990 >>> d = Differ()
2025-07-01 05:48:04.997 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:48:05.003 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:48:05.010 >>> print(''.join(results), end="")
2025-07-01 05:48:05.020 - abcDefghiJkl
2025-07-01 05:48:05.037 + abcdefGhijkl
2025-07-01 05:48:05.047 """
2025-07-01 05:48:05.054
2025-07-01 05:48:05.061 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:48:05.066 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:48:05.072 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:48:05.077 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:48:05.083 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:48:05.090
2025-07-01 05:48:05.101 # search for the pair that matches best without being identical
2025-07-01 05:48:05.112 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:48:05.123 # on junk -- unless we have to)
2025-07-01 05:48:05.132 for j in range(blo, bhi):
2025-07-01 05:48:05.139 bj = b[j]
2025-07-01 05:48:05.146 cruncher.set_seq2(bj)
2025-07-01 05:48:05.155 for i in range(alo, ahi):
2025-07-01 05:48:05.166 ai = a[i]
2025-07-01 05:48:05.175 if ai == bj:
2025-07-01 05:48:05.182 if eqi is None:
2025-07-01 05:48:05.192 eqi, eqj = i, j
2025-07-01 05:48:05.202 continue
2025-07-01 05:48:05.213 cruncher.set_seq1(ai)
2025-07-01 05:48:05.224 # computing similarity is expensive, so use the quick
2025-07-01 05:48:05.232 # upper bounds first -- have seen this speed up messy
2025-07-01 05:48:05.238 # compares by a factor of 3.
2025-07-01 05:48:05.244 # note that ratio() is only expensive to compute the first
2025-07-01 05:48:05.252 # time it's called on a sequence pair; the expensive part
2025-07-01 05:48:05.260 # of the computation is cached by cruncher
2025-07-01 05:48:05.266 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:48:05.273 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:48:05.279 cruncher.ratio() > best_ratio:
2025-07-01 05:48:05.286 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:48:05.293 if best_ratio < cutoff:
2025-07-01 05:48:05.306 # no non-identical "pretty close" pair
2025-07-01 05:48:05.313 if eqi is None:
2025-07-01 05:48:05.320 # no identical pair either -- treat it as a straight replace
2025-07-01 05:48:05.327 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:48:05.336 return
2025-07-01 05:48:05.345 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:48:05.351 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:48:05.358 else:
2025-07-01 05:48:05.363 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:48:05.369 eqi = None
2025-07-01 05:48:05.375
2025-07-01 05:48:05.382 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:48:05.390 # identical
2025-07-01 05:48:05.396
2025-07-01 05:48:05.402 # pump out diffs from before the synch point
2025-07-01 05:48:05.407 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:48:05.415
2025-07-01 05:48:05.426 # do intraline marking on the synch pair
2025-07-01 05:48:05.437 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:48:05.448 if eqi is None:
2025-07-01 05:48:05.458 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:48:05.464 atags = btags = ""
2025-07-01 05:48:05.470 cruncher.set_seqs(aelt, belt)
2025-07-01 05:48:05.476 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:48:05.481 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:48:05.487 if tag == 'replace':
2025-07-01 05:48:05.492 atags += '^' * la
2025-07-01 05:48:05.498 btags += '^' * lb
2025-07-01 05:48:05.504 elif tag == 'delete':
2025-07-01 05:48:05.510 atags += '-' * la
2025-07-01 05:48:05.516 elif tag == 'insert':
2025-07-01 05:48:05.521 btags += '+' * lb
2025-07-01 05:48:05.526 elif tag == 'equal':
2025-07-01 05:48:05.531 atags += ' ' * la
2025-07-01 05:48:05.537 btags += ' ' * lb
2025-07-01 05:48:05.543 else:
2025-07-01 05:48:05.548 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:48:05.552 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:48:05.557 else:
2025-07-01 05:48:05.564 # the synch pair is identical
2025-07-01 05:48:05.571 yield ' ' + aelt
2025-07-01 05:48:05.578
2025-07-01 05:48:05.590 # pump out diffs from after the synch point
2025-07-01 05:48:05.600 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:48:05.608
2025-07-01 05:48:05.615 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:48:05.623 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:48:05.634
2025-07-01 05:48:05.643 self = <difflib.Differ object at [hex]>
2025-07-01 05:48:05.650 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:48:05.655 alo = 276, ahi = 1101
2025-07-01 05:48:05.661 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:48:05.666 blo = 276, bhi = 1101
2025-07-01 05:48:05.674
2025-07-01 05:48:05.681 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:48:05.687 g = []
2025-07-01 05:48:05.694 if alo < ahi:
2025-07-01 05:48:05.701 if blo < bhi:
2025-07-01 05:48:05.707 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:48:05.714 else:
2025-07-01 05:48:05.722 g = self._dump('-', a, alo, ahi)
2025-07-01 05:48:05.734 elif blo < bhi:
2025-07-01 05:48:05.744 g = self._dump('+', b, blo, bhi)
2025-07-01 05:48:05.755
2025-07-01 05:48:05.766 > yield from g
2025-07-01 05:48:05.776
2025-07-01 05:48:05.785 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:48:05.795 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:48:05.807
2025-07-01 05:48:05.819 self = <difflib.Differ object at [hex]>
2025-07-01 05:48:05.831 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:48:05.839 alo = 276, ahi = 1101
2025-07-01 05:48:05.846 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:48:05.852 blo = 276, bhi = 1101
2025-07-01 05:48:05.859
2025-07-01 05:48:05.865 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:48:05.870 r"""
2025-07-01 05:48:05.875 When replacing one block of lines with another, search the blocks
2025-07-01 05:48:05.881 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:48:05.893 synch point, and intraline difference marking is done on the
2025-07-01 05:48:05.902 similar pair. Lots of work, but often worth it.
2025-07-01 05:48:05.910
2025-07-01 05:48:05.916 Example:
2025-07-01 05:48:05.922
2025-07-01 05:48:05.927 >>> d = Differ()
2025-07-01 05:48:05.933 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:48:05.940 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:48:05.946 >>> print(''.join(results), end="")
2025-07-01 05:48:05.952 - abcDefghiJkl
2025-07-01 05:48:05.963 + abcdefGhijkl
2025-07-01 05:48:05.982 """
2025-07-01 05:48:05.990
2025-07-01 05:48:05.998 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:48:06.009 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:48:06.017 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:48:06.024 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:48:06.030 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:48:06.036
2025-07-01 05:48:06.047 # search for the pair that matches best without being identical
2025-07-01 05:48:06.059 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:48:06.073 # on junk -- unless we have to)
2025-07-01 05:48:06.083 for j in range(blo, bhi):
2025-07-01 05:48:06.091 bj = b[j]
2025-07-01 05:48:06.098 cruncher.set_seq2(bj)
2025-07-01 05:48:06.104 for i in range(alo, ahi):
2025-07-01 05:48:06.110 ai = a[i]
2025-07-01 05:48:06.115 if ai == bj:
2025-07-01 05:48:06.120 if eqi is None:
2025-07-01 05:48:06.125 eqi, eqj = i, j
2025-07-01 05:48:06.130 continue
2025-07-01 05:48:06.138 cruncher.set_seq1(ai)
2025-07-01 05:48:06.145 # computing similarity is expensive, so use the quick
2025-07-01 05:48:06.153 # upper bounds first -- have seen this speed up messy
2025-07-01 05:48:06.158 # compares by a factor of 3.
2025-07-01 05:48:06.167 # note that ratio() is only expensive to compute the first
2025-07-01 05:48:06.175 # time it's called on a sequence pair; the expensive part
2025-07-01 05:48:06.182 # of the computation is cached by cruncher
2025-07-01 05:48:06.189 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:48:06.195 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:48:06.204 cruncher.ratio() > best_ratio:
2025-07-01 05:48:06.210 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:48:06.216 if best_ratio < cutoff:
2025-07-01 05:48:06.222 # no non-identical "pretty close" pair
2025-07-01 05:48:06.228 if eqi is None:
2025-07-01 05:48:06.234 # no identical pair either -- treat it as a straight replace
2025-07-01 05:48:06.240 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:48:06.245 return
2025-07-01 05:48:06.251 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:48:06.258 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:48:06.263 else:
2025-07-01 05:48:06.270 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:48:06.276 eqi = None
2025-07-01 05:48:06.283
2025-07-01 05:48:06.290 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:48:06.297 # identical
2025-07-01 05:48:06.303
2025-07-01 05:48:06.310 # pump out diffs from before the synch point
2025-07-01 05:48:06.317 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:48:06.324
2025-07-01 05:48:06.332 # do intraline marking on the synch pair
2025-07-01 05:48:06.339 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:48:06.345 if eqi is None:
2025-07-01 05:48:06.352 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:48:06.358 atags = btags = ""
2025-07-01 05:48:06.364 cruncher.set_seqs(aelt, belt)
2025-07-01 05:48:06.370 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:48:06.376 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:48:06.382 if tag == 'replace':
2025-07-01 05:48:06.388 atags += '^' * la
2025-07-01 05:48:06.394 btags += '^' * lb
2025-07-01 05:48:06.400 elif tag == 'delete':
2025-07-01 05:48:06.406 atags += '-' * la
2025-07-01 05:48:06.415 elif tag == 'insert':
2025-07-01 05:48:06.425 btags += '+' * lb
2025-07-01 05:48:06.433 elif tag == 'equal':
2025-07-01 05:48:06.440 atags += ' ' * la
2025-07-01 05:48:06.446 btags += ' ' * lb
2025-07-01 05:48:06.451 else:
2025-07-01 05:48:06.458 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:48:06.465 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:48:06.472 else:
2025-07-01 05:48:06.479 # the synch pair is identical
2025-07-01 05:48:06.486 yield ' ' + aelt
2025-07-01 05:48:06.492
2025-07-01 05:48:06.499 # pump out diffs from after the synch point
2025-07-01 05:48:06.506 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:48:06.513
2025-07-01 05:48:06.519 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:48:06.527 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:48:06.533
2025-07-01 05:48:06.545 self = <difflib.Differ object at [hex]>
2025-07-01 05:48:06.559 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:48:06.569 alo = 277, ahi = 1101
2025-07-01 05:48:06.578 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:48:06.585 blo = 277, bhi = 1101
2025-07-01 05:48:06.591
2025-07-01 05:48:06.598 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:48:06.607 g = []
2025-07-01 05:48:06.614 if alo < ahi:
2025-07-01 05:48:06.619 if blo < bhi:
2025-07-01 05:48:06.625 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:48:06.630 else:
2025-07-01 05:48:06.635 g = self._dump('-', a, alo, ahi)
2025-07-01 05:48:06.639 elif blo < bhi:
2025-07-01 05:48:06.644 g = self._dump('+', b, blo, bhi)
2025-07-01 05:48:06.648
2025-07-01 05:48:06.653 > yield from g
2025-07-01 05:48:06.657
2025-07-01 05:48:06.664 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:48:06.671 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:48:06.677
2025-07-01 05:48:06.684 self = <difflib.Differ object at [hex]>
2025-07-01 05:48:06.696 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:48:06.703 alo = 277, ahi = 1101
2025-07-01 05:48:06.711 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:48:06.717 blo = 277, bhi = 1101
2025-07-01 05:48:06.724
2025-07-01 05:48:06.731 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:48:06.738 r"""
2025-07-01 05:48:06.750 When replacing one block of lines with another, search the blocks
2025-07-01 05:48:06.759 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:48:06.767 synch point, and intraline difference marking is done on the
2025-07-01 05:48:06.774 similar pair. Lots of work, but often worth it.
2025-07-01 05:48:06.779
2025-07-01 05:48:06.786 Example:
2025-07-01 05:48:06.795
2025-07-01 05:48:06.802 >>> d = Differ()
2025-07-01 05:48:06.808 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:48:06.815 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:48:06.821 >>> print(''.join(results), end="")
2025-07-01 05:48:06.826 - abcDefghiJkl
2025-07-01 05:48:06.850 + abcdefGhijkl
2025-07-01 05:48:06.873 """
2025-07-01 05:48:06.883
2025-07-01 05:48:06.894 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:48:06.904 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:48:06.912 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:48:06.919 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:48:06.926 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:48:06.931
2025-07-01 05:48:06.937 # search for the pair that matches best without being identical
2025-07-01 05:48:06.943 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:48:06.949 # on junk -- unless we have to)
2025-07-01 05:48:06.955 for j in range(blo, bhi):
2025-07-01 05:48:06.963 bj = b[j]
2025-07-01 05:48:06.970 cruncher.set_seq2(bj)
2025-07-01 05:48:06.976 for i in range(alo, ahi):
2025-07-01 05:48:06.983 ai = a[i]
2025-07-01 05:48:06.990 if ai == bj:
2025-07-01 05:48:06.996 if eqi is None:
2025-07-01 05:48:07.006 eqi, eqj = i, j
2025-07-01 05:48:07.016 continue
2025-07-01 05:48:07.024 cruncher.set_seq1(ai)
2025-07-01 05:48:07.031 # computing similarity is expensive, so use the quick
2025-07-01 05:48:07.038 # upper bounds first -- have seen this speed up messy
2025-07-01 05:48:07.044 # compares by a factor of 3.
2025-07-01 05:48:07.050 # note that ratio() is only expensive to compute the first
2025-07-01 05:48:07.056 # time it's called on a sequence pair; the expensive part
2025-07-01 05:48:07.062 # of the computation is cached by cruncher
2025-07-01 05:48:07.068 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:48:07.074 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:48:07.080 cruncher.ratio() > best_ratio:
2025-07-01 05:48:07.087 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:48:07.097 if best_ratio < cutoff:
2025-07-01 05:48:07.105 # no non-identical "pretty close" pair
2025-07-01 05:48:07.111 if eqi is None:
2025-07-01 05:48:07.118 # no identical pair either -- treat it as a straight replace
2025-07-01 05:48:07.129 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:48:07.139 return
2025-07-01 05:48:07.147 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:48:07.155 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:48:07.166 else:
2025-07-01 05:48:07.176 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:48:07.183 eqi = None
2025-07-01 05:48:07.190
2025-07-01 05:48:07.197 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:48:07.203 # identical
2025-07-01 05:48:07.208
2025-07-01 05:48:07.214 # pump out diffs from before the synch point
2025-07-01 05:48:07.225 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:48:07.233
2025-07-01 05:48:07.240 # do intraline marking on the synch pair
2025-07-01 05:48:07.246 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:48:07.255 if eqi is None:
2025-07-01 05:48:07.263 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:48:07.270 atags = btags = ""
2025-07-01 05:48:07.278 cruncher.set_seqs(aelt, belt)
2025-07-01 05:48:07.290 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:48:07.300 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:48:07.312 if tag == 'replace':
2025-07-01 05:48:07.325 atags += '^' * la
2025-07-01 05:48:07.335 btags += '^' * lb
2025-07-01 05:48:07.348 elif tag == 'delete':
2025-07-01 05:48:07.358 atags += '-' * la
2025-07-01 05:48:07.367 elif tag == 'insert':
2025-07-01 05:48:07.374 btags += '+' * lb
2025-07-01 05:48:07.380 elif tag == 'equal':
2025-07-01 05:48:07.386 atags += ' ' * la
2025-07-01 05:48:07.392 btags += ' ' * lb
2025-07-01 05:48:07.402 else:
2025-07-01 05:48:07.414 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:48:07.424 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:48:07.430 else:
2025-07-01 05:48:07.443 # the synch pair is identical
2025-07-01 05:48:07.449 yield ' ' + aelt
2025-07-01 05:48:07.455
2025-07-01 05:48:07.461 # pump out diffs from after the synch point
2025-07-01 05:48:07.468 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:48:07.475
2025-07-01 05:48:07.481 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:48:07.487 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:48:07.493
2025-07-01 05:48:07.499 self = <difflib.Differ object at [hex]>
2025-07-01 05:48:07.505 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:48:07.511 alo = 278, ahi = 1101
2025-07-01 05:48:07.517 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:48:07.523 blo = 278, bhi = 1101
2025-07-01 05:48:07.528
2025-07-01 05:48:07.534 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:48:07.540 g = []
2025-07-01 05:48:07.546 if alo < ahi:
2025-07-01 05:48:07.555 if blo < bhi:
2025-07-01 05:48:07.564 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:48:07.570 else:
2025-07-01 05:48:07.576 g = self._dump('-', a, alo, ahi)
2025-07-01 05:48:07.582 elif blo < bhi:
2025-07-01 05:48:07.587 g = self._dump('+', b, blo, bhi)
2025-07-01 05:48:07.593
2025-07-01 05:48:07.599 > yield from g
2025-07-01 05:48:07.605
2025-07-01 05:48:07.611 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:48:07.617 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:48:07.623
2025-07-01 05:48:07.628 self = <difflib.Differ object at [hex]>
2025-07-01 05:48:07.633 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:48:07.639 alo = 278, ahi = 1101
2025-07-01 05:48:07.645 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:48:07.651 blo = 278, bhi = 1101
2025-07-01 05:48:07.657
2025-07-01 05:48:07.662 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:48:07.668 r"""
2025-07-01 05:48:07.674 When replacing one block of lines with another, search the blocks
2025-07-01 05:48:07.680 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:48:07.685 synch point, and intraline difference marking is done on the
2025-07-01 05:48:07.691 similar pair. Lots of work, but often worth it.
2025-07-01 05:48:07.697
2025-07-01 05:48:07.703 Example:
2025-07-01 05:48:07.708
2025-07-01 05:48:07.714 >>> d = Differ()
2025-07-01 05:48:07.720 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:48:07.726 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:48:07.732 >>> print(''.join(results), end="")
2025-07-01 05:48:07.737 - abcDefghiJkl
2025-07-01 05:48:07.749 + abcdefGhijkl
2025-07-01 05:48:07.760 """
2025-07-01 05:48:07.766
2025-07-01 05:48:07.772 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:48:07.778 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:48:07.783 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:48:07.789 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:48:07.795 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:48:07.800
2025-07-01 05:48:07.806 # search for the pair that matches best without being identical
2025-07-01 05:48:07.812 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:48:07.817 # on junk -- unless we have to)
2025-07-01 05:48:07.823 for j in range(blo, bhi):
2025-07-01 05:48:07.829 bj = b[j]
2025-07-01 05:48:07.834 cruncher.set_seq2(bj)
2025-07-01 05:48:07.843 for i in range(alo, ahi):
2025-07-01 05:48:07.854 ai = a[i]
2025-07-01 05:48:07.862 if ai == bj:
2025-07-01 05:48:07.868 if eqi is None:
2025-07-01 05:48:07.879 eqi, eqj = i, j
2025-07-01 05:48:07.887 continue
2025-07-01 05:48:07.898 cruncher.set_seq1(ai)
2025-07-01 05:48:07.906 # computing similarity is expensive, so use the quick
2025-07-01 05:48:07.913 # upper bounds first -- have seen this speed up messy
2025-07-01 05:48:07.919 # compares by a factor of 3.
2025-07-01 05:48:07.926 # note that ratio() is only expensive to compute the first
2025-07-01 05:48:07.933 # time it's called on a sequence pair; the expensive part
2025-07-01 05:48:07.940 # of the computation is cached by cruncher
2025-07-01 05:48:07.947 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:48:07.953 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:48:07.964 cruncher.ratio() > best_ratio:
2025-07-01 05:48:07.972 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:48:07.981 if best_ratio < cutoff:
2025-07-01 05:48:07.987 # no non-identical "pretty close" pair
2025-07-01 05:48:07.992 if eqi is None:
2025-07-01 05:48:07.996 # no identical pair either -- treat it as a straight replace
2025-07-01 05:48:08.003 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:48:08.016 return
2025-07-01 05:48:08.028 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:48:08.038 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:48:08.047 else:
2025-07-01 05:48:08.055 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:48:08.069 eqi = None
2025-07-01 05:48:08.080
2025-07-01 05:48:08.091 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:48:08.103 # identical
2025-07-01 05:48:08.116
2025-07-01 05:48:08.128 # pump out diffs from before the synch point
2025-07-01 05:48:08.140 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:48:08.152
2025-07-01 05:48:08.163 # do intraline marking on the synch pair
2025-07-01 05:48:08.178 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:48:08.185 if eqi is None:
2025-07-01 05:48:08.192 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:48:08.199 atags = btags = ""
2025-07-01 05:48:08.205 cruncher.set_seqs(aelt, belt)
2025-07-01 05:48:08.217 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:48:08.227 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:48:08.234 if tag == 'replace':
2025-07-01 05:48:08.241 atags += '^' * la
2025-07-01 05:48:08.248 btags += '^' * lb
2025-07-01 05:48:08.254 elif tag == 'delete':
2025-07-01 05:48:08.264 atags += '-' * la
2025-07-01 05:48:08.274 elif tag == 'insert':
2025-07-01 05:48:08.282 btags += '+' * lb
2025-07-01 05:48:08.291 elif tag == 'equal':
2025-07-01 05:48:08.304 atags += ' ' * la
2025-07-01 05:48:08.314 btags += ' ' * lb
2025-07-01 05:48:08.323 else:
2025-07-01 05:48:08.331 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:48:08.340 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:48:08.351 else:
2025-07-01 05:48:08.360 # the synch pair is identical
2025-07-01 05:48:08.370 yield ' ' + aelt
2025-07-01 05:48:08.384
2025-07-01 05:48:08.392 # pump out diffs from after the synch point
2025-07-01 05:48:08.399 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:48:08.404
2025-07-01 05:48:08.408 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:48:08.413 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:48:08.417
2025-07-01 05:48:08.422 self = <difflib.Differ object at [hex]>
2025-07-01 05:48:08.436 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:48:08.447 alo = 279, ahi = 1101
2025-07-01 05:48:08.459 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:48:08.472 blo = 279, bhi = 1101
2025-07-01 05:48:08.481
2025-07-01 05:48:08.487 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:48:08.495 g = []
2025-07-01 05:48:08.504 if alo < ahi:
2025-07-01 05:48:08.518 if blo < bhi:
2025-07-01 05:48:08.527 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:48:08.539 else:
2025-07-01 05:48:08.549 g = self._dump('-', a, alo, ahi)
2025-07-01 05:48:08.560 elif blo < bhi:
2025-07-01 05:48:08.568 g = self._dump('+', b, blo, bhi)
2025-07-01 05:48:08.573
2025-07-01 05:48:08.578 > yield from g
2025-07-01 05:48:08.583
2025-07-01 05:48:08.588 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:48:08.594 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:48:08.599
2025-07-01 05:48:08.607 self = <difflib.Differ object at [hex]>
2025-07-01 05:48:08.615 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:48:08.621 alo = 279, ahi = 1101
2025-07-01 05:48:08.632 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:48:08.639 blo = 279, bhi = 1101
2025-07-01 05:48:08.646
2025-07-01 05:48:08.653 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:48:08.658 r"""
2025-07-01 05:48:08.663 When replacing one block of lines with another, search the blocks
2025-07-01 05:48:08.668 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:48:08.674 synch point, and intraline difference marking is done on the
2025-07-01 05:48:08.681 similar pair. Lots of work, but often worth it.
2025-07-01 05:48:08.688
2025-07-01 05:48:08.694 Example:
2025-07-01 05:48:08.702
2025-07-01 05:48:08.710 >>> d = Differ()
2025-07-01 05:48:08.718 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:48:08.725 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:48:08.732 >>> print(''.join(results), end="")
2025-07-01 05:48:08.738 - abcDefghiJkl
2025-07-01 05:48:08.752 + abcdefGhijkl
2025-07-01 05:48:08.763 """
2025-07-01 05:48:08.769
2025-07-01 05:48:08.782 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:48:08.788 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:48:08.794 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:48:08.800 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:48:08.806 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:48:08.811
2025-07-01 05:48:08.818 # search for the pair that matches best without being identical
2025-07-01 05:48:08.825 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:48:08.831 # on junk -- unless we have to)
2025-07-01 05:48:08.837 for j in range(blo, bhi):
2025-07-01 05:48:08.842 bj = b[j]
2025-07-01 05:48:08.848 cruncher.set_seq2(bj)
2025-07-01 05:48:08.854 for i in range(alo, ahi):
2025-07-01 05:48:08.860 ai = a[i]
2025-07-01 05:48:08.865 if ai == bj:
2025-07-01 05:48:08.872 if eqi is None:
2025-07-01 05:48:08.879 eqi, eqj = i, j
2025-07-01 05:48:08.886 continue
2025-07-01 05:48:08.896 cruncher.set_seq1(ai)
2025-07-01 05:48:08.906 # computing similarity is expensive, so use the quick
2025-07-01 05:48:08.922 # upper bounds first -- have seen this speed up messy
2025-07-01 05:48:08.932 # compares by a factor of 3.
2025-07-01 05:48:08.940 # note that ratio() is only expensive to compute the first
2025-07-01 05:48:08.946 # time it's called on a sequence pair; the expensive part
2025-07-01 05:48:08.953 # of the computation is cached by cruncher
2025-07-01 05:48:08.958 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:48:08.963 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:48:08.967 cruncher.ratio() > best_ratio:
2025-07-01 05:48:08.972 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:48:08.977 if best_ratio < cutoff:
2025-07-01 05:48:08.982 # no non-identical "pretty close" pair
2025-07-01 05:48:08.992 if eqi is None:
2025-07-01 05:48:09.002 # no identical pair either -- treat it as a straight replace
2025-07-01 05:48:09.011 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:48:09.019 return
2025-07-01 05:48:09.030 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:48:09.038 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:48:09.046 else:
2025-07-01 05:48:09.053 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:48:09.059 eqi = None
2025-07-01 05:48:09.064
2025-07-01 05:48:09.070 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:48:09.076 # identical
2025-07-01 05:48:09.081
2025-07-01 05:48:09.087 # pump out diffs from before the synch point
2025-07-01 05:48:09.093 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:48:09.098
2025-07-01 05:48:09.104 # do intraline marking on the synch pair
2025-07-01 05:48:09.109 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:48:09.115 if eqi is None:
2025-07-01 05:48:09.121 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:48:09.127 atags = btags = ""
2025-07-01 05:48:09.132 cruncher.set_seqs(aelt, belt)
2025-07-01 05:48:09.138 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:48:09.144 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:48:09.150 if tag == 'replace':
2025-07-01 05:48:09.161 atags += '^' * la
2025-07-01 05:48:09.171 btags += '^' * lb
2025-07-01 05:48:09.179 elif tag == 'delete':
2025-07-01 05:48:09.186 atags += '-' * la
2025-07-01 05:48:09.192 elif tag == 'insert':
2025-07-01 05:48:09.198 btags += '+' * lb
2025-07-01 05:48:09.204 elif tag == 'equal':
2025-07-01 05:48:09.210 atags += ' ' * la
2025-07-01 05:48:09.214 btags += ' ' * lb
2025-07-01 05:48:09.218 else:
2025-07-01 05:48:09.223 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:48:09.228 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:48:09.232 else:
2025-07-01 05:48:09.238 # the synch pair is identical
2025-07-01 05:48:09.247 yield ' ' + aelt
2025-07-01 05:48:09.255
2025-07-01 05:48:09.262 # pump out diffs from after the synch point
2025-07-01 05:48:09.268 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:48:09.274
2025-07-01 05:48:09.280 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:48:09.286 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:48:09.292
2025-07-01 05:48:09.302 self = <difflib.Differ object at [hex]>
2025-07-01 05:48:09.314 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:48:09.323 alo = 280, ahi = 1101
2025-07-01 05:48:09.333 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:48:09.339 blo = 280, bhi = 1101
2025-07-01 05:48:09.345
2025-07-01 05:48:09.354 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:48:09.362 g = []
2025-07-01 05:48:09.368 if alo < ahi:
2025-07-01 05:48:09.374 if blo < bhi:
2025-07-01 05:48:09.378 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:48:09.383 else:
2025-07-01 05:48:09.387 g = self._dump('-', a, alo, ahi)
2025-07-01 05:48:09.392 elif blo < bhi:
2025-07-01 05:48:09.396 g = self._dump('+', b, blo, bhi)
2025-07-01 05:48:09.401
2025-07-01 05:48:09.405 > yield from g
2025-07-01 05:48:09.409
2025-07-01 05:48:09.414 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:48:09.418 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:48:09.423
2025-07-01 05:48:09.427 self = <difflib.Differ object at [hex]>
2025-07-01 05:48:09.432 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:48:09.437 alo = 280, ahi = 1101
2025-07-01 05:48:09.441 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:48:09.446 blo = 280, bhi = 1101
2025-07-01 05:48:09.450
2025-07-01 05:48:09.461 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:48:09.471 r"""
2025-07-01 05:48:09.483 When replacing one block of lines with another, search the blocks
2025-07-01 05:48:09.493 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:48:09.505 synch point, and intraline difference marking is done on the
2025-07-01 05:48:09.517 similar pair. Lots of work, but often worth it.
2025-07-01 05:48:09.529
2025-07-01 05:48:09.540 Example:
2025-07-01 05:48:09.548
2025-07-01 05:48:09.556 >>> d = Differ()
2025-07-01 05:48:09.563 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:48:09.569 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:48:09.576 >>> print(''.join(results), end="")
2025-07-01 05:48:09.581 - abcDefghiJkl
2025-07-01 05:48:09.593 + abcdefGhijkl
2025-07-01 05:48:09.604 """
2025-07-01 05:48:09.610
2025-07-01 05:48:09.620 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:48:09.630 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:48:09.637 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:48:09.643 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:48:09.651 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:48:09.662
2025-07-01 05:48:09.671 # search for the pair that matches best without being identical
2025-07-01 05:48:09.679 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:48:09.685 # on junk -- unless we have to)
2025-07-01 05:48:09.696 for j in range(blo, bhi):
2025-07-01 05:48:09.707 bj = b[j]
2025-07-01 05:48:09.715 cruncher.set_seq2(bj)
2025-07-01 05:48:09.727 for i in range(alo, ahi):
2025-07-01 05:48:09.737 ai = a[i]
2025-07-01 05:48:09.744 if ai == bj:
2025-07-01 05:48:09.752 if eqi is None:
2025-07-01 05:48:09.763 eqi, eqj = i, j
2025-07-01 05:48:09.775 continue
2025-07-01 05:48:09.786 cruncher.set_seq1(ai)
2025-07-01 05:48:09.795 # computing similarity is expensive, so use the quick
2025-07-01 05:48:09.807 # upper bounds first -- have seen this speed up messy
2025-07-01 05:48:09.819 # compares by a factor of 3.
2025-07-01 05:48:09.829 # note that ratio() is only expensive to compute the first
2025-07-01 05:48:09.837 # time it's called on a sequence pair; the expensive part
2025-07-01 05:48:09.844 # of the computation is cached by cruncher
2025-07-01 05:48:09.850 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:48:09.858 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:48:09.866 cruncher.ratio() > best_ratio:
2025-07-01 05:48:09.874 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:48:09.885 if best_ratio < cutoff:
2025-07-01 05:48:09.895 # no non-identical "pretty close" pair
2025-07-01 05:48:09.907 if eqi is None:
2025-07-01 05:48:09.920 # no identical pair either -- treat it as a straight replace
2025-07-01 05:48:09.930 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:48:09.937 return
2025-07-01 05:48:09.944 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:48:09.950 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:48:09.960 else:
2025-07-01 05:48:09.970 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:48:09.982 eqi = None
2025-07-01 05:48:09.991
2025-07-01 05:48:10.002 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:48:10.014 # identical
2025-07-01 05:48:10.025
2025-07-01 05:48:10.035 # pump out diffs from before the synch point
2025-07-01 05:48:10.042 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:48:10.048
2025-07-01 05:48:10.054 # do intraline marking on the synch pair
2025-07-01 05:48:10.062 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:48:10.069 if eqi is None:
2025-07-01 05:48:10.075 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:48:10.081 atags = btags = ""
2025-07-01 05:48:10.086 cruncher.set_seqs(aelt, belt)
2025-07-01 05:48:10.098 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:48:10.109 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:48:10.118 if tag == 'replace':
2025-07-01 05:48:10.125 atags += '^' * la
2025-07-01 05:48:10.136 btags += '^' * lb
2025-07-01 05:48:10.147 elif tag == 'delete':
2025-07-01 05:48:10.155 atags += '-' * la
2025-07-01 05:48:10.162 elif tag == 'insert':
2025-07-01 05:48:10.168 btags += '+' * lb
2025-07-01 05:48:10.174 elif tag == 'equal':
2025-07-01 05:48:10.180 atags += ' ' * la
2025-07-01 05:48:10.186 btags += ' ' * lb
2025-07-01 05:48:10.191 else:
2025-07-01 05:48:10.198 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:48:10.208 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:48:10.216 else:
2025-07-01 05:48:10.224 # the synch pair is identical
2025-07-01 05:48:10.231 yield ' ' + aelt
2025-07-01 05:48:10.241
2025-07-01 05:48:10.249 # pump out diffs from after the synch point
2025-07-01 05:48:10.257 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:48:10.263
2025-07-01 05:48:10.271 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:48:10.281 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:48:10.290
2025-07-01 05:48:10.298 self = <difflib.Differ object at [hex]>
2025-07-01 05:48:10.306 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:48:10.317 alo = 281, ahi = 1101
2025-07-01 05:48:10.328 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:48:10.336 blo = 281, bhi = 1101
2025-07-01 05:48:10.343
2025-07-01 05:48:10.351 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:48:10.361 g = []
2025-07-01 05:48:10.370 if alo < ahi:
2025-07-01 05:48:10.377 if blo < bhi:
2025-07-01 05:48:10.384 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:48:10.390 else:
2025-07-01 05:48:10.401 g = self._dump('-', a, alo, ahi)
2025-07-01 05:48:10.411 elif blo < bhi:
2025-07-01 05:48:10.419 g = self._dump('+', b, blo, bhi)
2025-07-01 05:48:10.428
2025-07-01 05:48:10.437 > yield from g
2025-07-01 05:48:10.444
2025-07-01 05:48:10.451 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:48:10.459 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:48:10.467
2025-07-01 05:48:10.478 self = <difflib.Differ object at [hex]>
2025-07-01 05:48:10.487 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:48:10.493 alo = 281, ahi = 1101
2025-07-01 05:48:10.503 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:48:10.511 blo = 281, bhi = 1101
2025-07-01 05:48:10.518
2025-07-01 05:48:10.524 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:48:10.530 r"""
2025-07-01 05:48:10.537 When replacing one block of lines with another, search the blocks
2025-07-01 05:48:10.544 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:48:10.552 synch point, and intraline difference marking is done on the
2025-07-01 05:48:10.559 similar pair. Lots of work, but often worth it.
2025-07-01 05:48:10.565
2025-07-01 05:48:10.572 Example:
2025-07-01 05:48:10.579
2025-07-01 05:48:10.585 >>> d = Differ()
2025-07-01 05:48:10.590 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:48:10.596 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:48:10.602 >>> print(''.join(results), end="")
2025-07-01 05:48:10.607 - abcDefghiJkl
2025-07-01 05:48:10.618 + abcdefGhijkl
2025-07-01 05:48:10.639 """
2025-07-01 05:48:10.647
2025-07-01 05:48:10.655 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:48:10.666 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:48:10.675 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:48:10.684 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:48:10.694 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:48:10.700
2025-07-01 05:48:10.706 # search for the pair that matches best without being identical
2025-07-01 05:48:10.712 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:48:10.718 # on junk -- unless we have to)
2025-07-01 05:48:10.729 for j in range(blo, bhi):
2025-07-01 05:48:10.740 bj = b[j]
2025-07-01 05:48:10.748 cruncher.set_seq2(bj)
2025-07-01 05:48:10.755 for i in range(alo, ahi):
2025-07-01 05:48:10.767 ai = a[i]
2025-07-01 05:48:10.774 if ai == bj:
2025-07-01 05:48:10.781 if eqi is None:
2025-07-01 05:48:10.788 eqi, eqj = i, j
2025-07-01 05:48:10.795 continue
2025-07-01 05:48:10.806 cruncher.set_seq1(ai)
2025-07-01 05:48:10.818 # computing similarity is expensive, so use the quick
2025-07-01 05:48:10.830 # upper bounds first -- have seen this speed up messy
2025-07-01 05:48:10.840 # compares by a factor of 3.
2025-07-01 05:48:10.848 # note that ratio() is only expensive to compute the first
2025-07-01 05:48:10.855 # time it's called on a sequence pair; the expensive part
2025-07-01 05:48:10.862 # of the computation is cached by cruncher
2025-07-01 05:48:10.868 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:48:10.874 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:48:10.883 cruncher.ratio() > best_ratio:
2025-07-01 05:48:10.895 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:48:10.905 if best_ratio < cutoff:
2025-07-01 05:48:10.913 # no non-identical "pretty close" pair
2025-07-01 05:48:10.920 if eqi is None:
2025-07-01 05:48:10.926 # no identical pair either -- treat it as a straight replace
2025-07-01 05:48:10.938 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:48:10.945 return
2025-07-01 05:48:10.954 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:48:10.967 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:48:10.977 else:
2025-07-01 05:48:10.987 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:48:10.995 eqi = None
2025-07-01 05:48:11.007
2025-07-01 05:48:11.018 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:48:11.023 # identical
2025-07-01 05:48:11.029
2025-07-01 05:48:11.036 # pump out diffs from before the synch point
2025-07-01 05:48:11.045 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:48:11.057
2025-07-01 05:48:11.068 # do intraline marking on the synch pair
2025-07-01 05:48:11.078 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:48:11.090 if eqi is None:
2025-07-01 05:48:11.102 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:48:11.115 atags = btags = ""
2025-07-01 05:48:11.123 cruncher.set_seqs(aelt, belt)
2025-07-01 05:48:11.130 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:48:11.137 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:48:11.145 if tag == 'replace':
2025-07-01 05:48:11.151 atags += '^' * la
2025-07-01 05:48:11.158 btags += '^' * lb
2025-07-01 05:48:11.168 elif tag == 'delete':
2025-07-01 05:48:11.177 atags += '-' * la
2025-07-01 05:48:11.186 elif tag == 'insert':
2025-07-01 05:48:11.200 btags += '+' * lb
2025-07-01 05:48:11.213 elif tag == 'equal':
2025-07-01 05:48:11.224 atags += ' ' * la
2025-07-01 05:48:11.237 btags += ' ' * lb
2025-07-01 05:48:11.249 else:
2025-07-01 05:48:11.260 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:48:11.268 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:48:11.275 else:
2025-07-01 05:48:11.282 # the synch pair is identical
2025-07-01 05:48:11.293 yield ' ' + aelt
2025-07-01 05:48:11.307
2025-07-01 05:48:11.316 # pump out diffs from after the synch point
2025-07-01 05:48:11.323 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:48:11.329
2025-07-01 05:48:11.334 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:48:11.346 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:48:11.354
2025-07-01 05:48:11.361 self = <difflib.Differ object at [hex]>
2025-07-01 05:48:11.371 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:48:11.382 alo = 282, ahi = 1101
2025-07-01 05:48:11.392 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:48:11.402 blo = 282, bhi = 1101
2025-07-01 05:48:11.408
2025-07-01 05:48:11.414 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:48:11.420 g = []
2025-07-01 05:48:11.426 if alo < ahi:
2025-07-01 05:48:11.433 if blo < bhi:
2025-07-01 05:48:11.439 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:48:11.448 else:
2025-07-01 05:48:11.456 g = self._dump('-', a, alo, ahi)
2025-07-01 05:48:11.461 elif blo < bhi:
2025-07-01 05:48:11.466 g = self._dump('+', b, blo, bhi)
2025-07-01 05:48:11.472
2025-07-01 05:48:11.478 > yield from g
2025-07-01 05:48:11.484
2025-07-01 05:48:11.491 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:48:11.499 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:48:11.508
2025-07-01 05:48:11.519 self = <difflib.Differ object at [hex]>
2025-07-01 05:48:11.530 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:48:11.537 alo = 282, ahi = 1101
2025-07-01 05:48:11.551 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:48:11.561 blo = 282, bhi = 1101
2025-07-01 05:48:11.569
2025-07-01 05:48:11.576 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:48:11.583 r"""
2025-07-01 05:48:11.591 When replacing one block of lines with another, search the blocks
2025-07-01 05:48:11.599 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:48:11.609 synch point, and intraline difference marking is done on the
2025-07-01 05:48:11.622 similar pair. Lots of work, but often worth it.
2025-07-01 05:48:11.634
2025-07-01 05:48:11.643 Example:
2025-07-01 05:48:11.649
2025-07-01 05:48:11.657 >>> d = Differ()
2025-07-01 05:48:11.664 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:48:11.670 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:48:11.676 >>> print(''.join(results), end="")
2025-07-01 05:48:11.682 - abcDefghiJkl
2025-07-01 05:48:11.694 + abcdefGhijkl
2025-07-01 05:48:11.705 """
2025-07-01 05:48:11.711
2025-07-01 05:48:11.717 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:48:11.723 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:48:11.729 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:48:11.735 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:48:11.743 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:48:11.756
2025-07-01 05:48:11.770 # search for the pair that matches best without being identical
2025-07-01 05:48:11.777 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:48:11.785 # on junk -- unless we have to)
2025-07-01 05:48:11.792 for j in range(blo, bhi):
2025-07-01 05:48:11.798 bj = b[j]
2025-07-01 05:48:11.803 cruncher.set_seq2(bj)
2025-07-01 05:48:11.809 for i in range(alo, ahi):
2025-07-01 05:48:11.815 ai = a[i]
2025-07-01 05:48:11.822 if ai == bj:
2025-07-01 05:48:11.831 if eqi is None:
2025-07-01 05:48:11.839 eqi, eqj = i, j
2025-07-01 05:48:11.851 continue
2025-07-01 05:48:11.863 cruncher.set_seq1(ai)
2025-07-01 05:48:11.872 # computing similarity is expensive, so use the quick
2025-07-01 05:48:11.882 # upper bounds first -- have seen this speed up messy
2025-07-01 05:48:11.890 # compares by a factor of 3.
2025-07-01 05:48:11.898 # note that ratio() is only expensive to compute the first
2025-07-01 05:48:11.905 # time it's called on a sequence pair; the expensive part
2025-07-01 05:48:11.917 # of the computation is cached by cruncher
2025-07-01 05:48:11.928 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:48:11.937 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:48:11.945 cruncher.ratio() > best_ratio:
2025-07-01 05:48:11.953 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:48:11.961 if best_ratio < cutoff:
2025-07-01 05:48:11.969 # no non-identical "pretty close" pair
2025-07-01 05:48:11.975 if eqi is None:
2025-07-01 05:48:11.983 # no identical pair either -- treat it as a straight replace
2025-07-01 05:48:11.994 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:48:12.003 return
2025-07-01 05:48:12.016 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:48:12.028 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:48:12.038 else:
2025-07-01 05:48:12.046 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:48:12.055 eqi = None
2025-07-01 05:48:12.064
2025-07-01 05:48:12.072 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:48:12.079 # identical
2025-07-01 05:48:12.085
2025-07-01 05:48:12.091 # pump out diffs from before the synch point
2025-07-01 05:48:12.098 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:48:12.104
2025-07-01 05:48:12.111 # do intraline marking on the synch pair
2025-07-01 05:48:12.118 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:48:12.124 if eqi is None:
2025-07-01 05:48:12.131 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:48:12.144 atags = btags = ""
2025-07-01 05:48:12.158 cruncher.set_seqs(aelt, belt)
2025-07-01 05:48:12.171 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:48:12.182 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:48:12.191 if tag == 'replace':
2025-07-01 05:48:12.201 atags += '^' * la
2025-07-01 05:48:12.211 btags += '^' * lb
2025-07-01 05:48:12.220 elif tag == 'delete':
2025-07-01 05:48:12.226 atags += '-' * la
2025-07-01 05:48:12.236 elif tag == 'insert':
2025-07-01 05:48:12.246 btags += '+' * lb
2025-07-01 05:48:12.254 elif tag == 'equal':
2025-07-01 05:48:12.263 atags += ' ' * la
2025-07-01 05:48:12.271 btags += ' ' * lb
2025-07-01 05:48:12.277 else:
2025-07-01 05:48:12.284 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:48:12.291 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:48:12.299 else:
2025-07-01 05:48:12.311 # the synch pair is identical
2025-07-01 05:48:12.320 yield ' ' + aelt
2025-07-01 05:48:12.327
2025-07-01 05:48:12.340 # pump out diffs from after the synch point
2025-07-01 05:48:12.352 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:48:12.360
2025-07-01 05:48:12.368 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:48:12.378 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:48:12.389
2025-07-01 05:48:12.399 self = <difflib.Differ object at [hex]>
2025-07-01 05:48:12.409 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:48:12.418 alo = 283, ahi = 1101
2025-07-01 05:48:12.431 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:48:12.440 blo = 283, bhi = 1101
2025-07-01 05:48:12.447
2025-07-01 05:48:12.455 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:48:12.461 g = []
2025-07-01 05:48:12.467 if alo < ahi:
2025-07-01 05:48:12.475 if blo < bhi:
2025-07-01 05:48:12.481 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:48:12.486 else:
2025-07-01 05:48:12.492 g = self._dump('-', a, alo, ahi)
2025-07-01 05:48:12.503 elif blo < bhi:
2025-07-01 05:48:12.515 g = self._dump('+', b, blo, bhi)
2025-07-01 05:48:12.524
2025-07-01 05:48:12.532 > yield from g
2025-07-01 05:48:12.541
2025-07-01 05:48:12.552 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:48:12.561 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:48:12.574
2025-07-01 05:48:12.586 self = <difflib.Differ object at [hex]>
2025-07-01 05:48:12.597 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:48:12.607 alo = 283, ahi = 1101
2025-07-01 05:48:12.623 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:48:12.636 blo = 283, bhi = 1101
2025-07-01 05:48:12.648
2025-07-01 05:48:12.660 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:48:12.670 r"""
2025-07-01 05:48:12.678 When replacing one block of lines with another, search the blocks
2025-07-01 05:48:12.686 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:48:12.693 synch point, and intraline difference marking is done on the
2025-07-01 05:48:12.699 similar pair. Lots of work, but often worth it.
2025-07-01 05:48:12.704
2025-07-01 05:48:12.710 Example:
2025-07-01 05:48:12.723
2025-07-01 05:48:12.734 >>> d = Differ()
2025-07-01 05:48:12.747 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:48:12.756 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:48:12.768 >>> print(''.join(results), end="")
2025-07-01 05:48:12.774 - abcDefghiJkl
2025-07-01 05:48:12.785 + abcdefGhijkl
2025-07-01 05:48:12.806 """
2025-07-01 05:48:12.813
2025-07-01 05:48:12.821 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:48:12.827 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:48:12.832 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:48:12.838 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:48:12.843 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:48:12.848
2025-07-01 05:48:12.854 # search for the pair that matches best without being identical
2025-07-01 05:48:12.859 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:48:12.865 # on junk -- unless we have to)
2025-07-01 05:48:12.871 for j in range(blo, bhi):
2025-07-01 05:48:12.877 bj = b[j]
2025-07-01 05:48:12.884 cruncher.set_seq2(bj)
2025-07-01 05:48:12.891 for i in range(alo, ahi):
2025-07-01 05:48:12.897 ai = a[i]
2025-07-01 05:48:12.904 if ai == bj:
2025-07-01 05:48:12.911 if eqi is None:
2025-07-01 05:48:12.918 eqi, eqj = i, j
2025-07-01 05:48:12.929 continue
2025-07-01 05:48:12.939 cruncher.set_seq1(ai)
2025-07-01 05:48:12.948 # computing similarity is expensive, so use the quick
2025-07-01 05:48:12.955 # upper bounds first -- have seen this speed up messy
2025-07-01 05:48:12.960 # compares by a factor of 3.
2025-07-01 05:48:12.965 # note that ratio() is only expensive to compute the first
2025-07-01 05:48:12.970 # time it's called on a sequence pair; the expensive part
2025-07-01 05:48:12.975 # of the computation is cached by cruncher
2025-07-01 05:48:12.981 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:48:12.986 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:48:12.994 cruncher.ratio() > best_ratio:
2025-07-01 05:48:13.000 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:48:13.006 if best_ratio < cutoff:
2025-07-01 05:48:13.014 # no non-identical "pretty close" pair
2025-07-01 05:48:13.022 if eqi is None:
2025-07-01 05:48:13.029 # no identical pair either -- treat it as a straight replace
2025-07-01 05:48:13.035 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:48:13.041 return
2025-07-01 05:48:13.046 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:48:13.052 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:48:13.058 else:
2025-07-01 05:48:13.066 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:48:13.072 eqi = None
2025-07-01 05:48:13.079
2025-07-01 05:48:13.083 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:48:13.088 # identical
2025-07-01 05:48:13.093
2025-07-01 05:48:13.098 # pump out diffs from before the synch point
2025-07-01 05:48:13.103 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:48:13.108
2025-07-01 05:48:13.113 # do intraline marking on the synch pair
2025-07-01 05:48:13.119 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:48:13.130 if eqi is None:
2025-07-01 05:48:13.137 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:48:13.148 atags = btags = ""
2025-07-01 05:48:13.155 cruncher.set_seqs(aelt, belt)
2025-07-01 05:48:13.162 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:48:13.169 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:48:13.176 if tag == 'replace':
2025-07-01 05:48:13.183 atags += '^' * la
2025-07-01 05:48:13.192 btags += '^' * lb
2025-07-01 05:48:13.200 elif tag == 'delete':
2025-07-01 05:48:13.210 atags += '-' * la
2025-07-01 05:48:13.220 elif tag == 'insert':
2025-07-01 05:48:13.228 btags += '+' * lb
2025-07-01 05:48:13.235 elif tag == 'equal':
2025-07-01 05:48:13.243 atags += ' ' * la
2025-07-01 05:48:13.254 btags += ' ' * lb
2025-07-01 05:48:13.261 else:
2025-07-01 05:48:13.267 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:48:13.273 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:48:13.279 else:
2025-07-01 05:48:13.285 # the synch pair is identical
2025-07-01 05:48:13.290 yield ' ' + aelt
2025-07-01 05:48:13.296
2025-07-01 05:48:13.302 # pump out diffs from after the synch point
2025-07-01 05:48:13.307 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:48:13.313
2025-07-01 05:48:13.318 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:48:13.324 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:48:13.330
2025-07-01 05:48:13.336 self = <difflib.Differ object at [hex]>
2025-07-01 05:48:13.342 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:48:13.348 alo = 284, ahi = 1101
2025-07-01 05:48:13.354 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:48:13.360 blo = 284, bhi = 1101
2025-07-01 05:48:13.365
2025-07-01 05:48:13.371 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:48:13.376 g = []
2025-07-01 05:48:13.382 if alo < ahi:
2025-07-01 05:48:13.388 if blo < bhi:
2025-07-01 05:48:13.393 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:48:13.399 else:
2025-07-01 05:48:13.405 g = self._dump('-', a, alo, ahi)
2025-07-01 05:48:13.410 elif blo < bhi:
2025-07-01 05:48:13.415 g = self._dump('+', b, blo, bhi)
2025-07-01 05:48:13.423
2025-07-01 05:48:13.434 > yield from g
2025-07-01 05:48:13.444
2025-07-01 05:48:13.454 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:48:13.462 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:48:13.468
2025-07-01 05:48:13.475 self = <difflib.Differ object at [hex]>
2025-07-01 05:48:13.484 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:48:13.491 alo = 284, ahi = 1101
2025-07-01 05:48:13.498 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:48:13.508 blo = 284, bhi = 1101
2025-07-01 05:48:13.518
2025-07-01 05:48:13.530 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:48:13.540 r"""
2025-07-01 05:48:13.547 When replacing one block of lines with another, search the blocks
2025-07-01 05:48:13.554 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:48:13.559 synch point, and intraline difference marking is done on the
2025-07-01 05:48:13.564 similar pair. Lots of work, but often worth it.
2025-07-01 05:48:13.570
2025-07-01 05:48:13.576 Example:
2025-07-01 05:48:13.581
2025-07-01 05:48:13.588 >>> d = Differ()
2025-07-01 05:48:13.595 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:48:13.601 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:48:13.607 >>> print(''.join(results), end="")
2025-07-01 05:48:13.612 - abcDefghiJkl
2025-07-01 05:48:13.622 + abcdefGhijkl
2025-07-01 05:48:13.633 """
2025-07-01 05:48:13.645
2025-07-01 05:48:13.658 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:48:13.670 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:48:13.682 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:48:13.689 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:48:13.696 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:48:13.702
2025-07-01 05:48:13.711 # search for the pair that matches best without being identical
2025-07-01 05:48:13.723 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:48:13.735 # on junk -- unless we have to)
2025-07-01 05:48:13.742 for j in range(blo, bhi):
2025-07-01 05:48:13.749 bj = b[j]
2025-07-01 05:48:13.756 cruncher.set_seq2(bj)
2025-07-01 05:48:13.764 for i in range(alo, ahi):
2025-07-01 05:48:13.770 ai = a[i]
2025-07-01 05:48:13.776 if ai == bj:
2025-07-01 05:48:13.783 if eqi is None:
2025-07-01 05:48:13.791 eqi, eqj = i, j
2025-07-01 05:48:13.801 continue
2025-07-01 05:48:13.810 cruncher.set_seq1(ai)
2025-07-01 05:48:13.816 # computing similarity is expensive, so use the quick
2025-07-01 05:48:13.822 # upper bounds first -- have seen this speed up messy
2025-07-01 05:48:13.827 # compares by a factor of 3.
2025-07-01 05:48:13.833 # note that ratio() is only expensive to compute the first
2025-07-01 05:48:13.838 # time it's called on a sequence pair; the expensive part
2025-07-01 05:48:13.844 # of the computation is cached by cruncher
2025-07-01 05:48:13.855 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:48:13.864 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:48:13.877 cruncher.ratio() > best_ratio:
2025-07-01 05:48:13.888 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:48:13.900 if best_ratio < cutoff:
2025-07-01 05:48:13.911 # no non-identical "pretty close" pair
2025-07-01 05:48:13.924 if eqi is None:
2025-07-01 05:48:13.936 # no identical pair either -- treat it as a straight replace
2025-07-01 05:48:13.944 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:48:13.952 return
2025-07-01 05:48:13.963 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:48:13.973 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:48:13.980 else:
2025-07-01 05:48:13.986 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:48:13.992 eqi = None
2025-07-01 05:48:13.999
2025-07-01 05:48:14.011 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:48:14.019 # identical
2025-07-01 05:48:14.026
2025-07-01 05:48:14.035 # pump out diffs from before the synch point
2025-07-01 05:48:14.045 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:48:14.056
2025-07-01 05:48:14.064 # do intraline marking on the synch pair
2025-07-01 05:48:14.072 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:48:14.078 if eqi is None:
2025-07-01 05:48:14.092 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:48:14.104 atags = btags = ""
2025-07-01 05:48:14.113 cruncher.set_seqs(aelt, belt)
2025-07-01 05:48:14.120 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:48:14.131 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:48:14.144 if tag == 'replace':
2025-07-01 05:48:14.156 atags += '^' * la
2025-07-01 05:48:14.164 btags += '^' * lb
2025-07-01 05:48:14.173 elif tag == 'delete':
2025-07-01 05:48:14.185 atags += '-' * la
2025-07-01 05:48:14.194 elif tag == 'insert':
2025-07-01 05:48:14.200 btags += '+' * lb
2025-07-01 05:48:14.207 elif tag == 'equal':
2025-07-01 05:48:14.213 atags += ' ' * la
2025-07-01 05:48:14.218 btags += ' ' * lb
2025-07-01 05:48:14.223 else:
2025-07-01 05:48:14.229 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:48:14.236 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:48:14.243 else:
2025-07-01 05:48:14.250 # the synch pair is identical
2025-07-01 05:48:14.259 yield ' ' + aelt
2025-07-01 05:48:14.270
2025-07-01 05:48:14.279 # pump out diffs from after the synch point
2025-07-01 05:48:14.287 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:48:14.295
2025-07-01 05:48:14.302 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:48:14.311 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:48:14.319
2025-07-01 05:48:14.327 self = <difflib.Differ object at [hex]>
2025-07-01 05:48:14.333 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:48:14.345 alo = 285, ahi = 1101
2025-07-01 05:48:14.354 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:48:14.361 blo = 285, bhi = 1101
2025-07-01 05:48:14.367
2025-07-01 05:48:14.372 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:48:14.377 g = []
2025-07-01 05:48:14.382 if alo < ahi:
2025-07-01 05:48:14.387 if blo < bhi:
2025-07-01 05:48:14.391 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:48:14.396 else:
2025-07-01 05:48:14.401 g = self._dump('-', a, alo, ahi)
2025-07-01 05:48:14.406 elif blo < bhi:
2025-07-01 05:48:14.411 g = self._dump('+', b, blo, bhi)
2025-07-01 05:48:14.416
2025-07-01 05:48:14.420 > yield from g
2025-07-01 05:48:14.425
2025-07-01 05:48:14.430 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:48:14.435 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:48:14.439
2025-07-01 05:48:14.444 self = <difflib.Differ object at [hex]>
2025-07-01 05:48:14.449 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:48:14.454 alo = 285, ahi = 1101
2025-07-01 05:48:14.462 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:48:14.468 blo = 285, bhi = 1101
2025-07-01 05:48:14.474
2025-07-01 05:48:14.479 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:48:14.485 r"""
2025-07-01 05:48:14.492 When replacing one block of lines with another, search the blocks
2025-07-01 05:48:14.499 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:48:14.507 synch point, and intraline difference marking is done on the
2025-07-01 05:48:14.513 similar pair. Lots of work, but often worth it.
2025-07-01 05:48:14.520
2025-07-01 05:48:14.526 Example:
2025-07-01 05:48:14.531
2025-07-01 05:48:14.537 >>> d = Differ()
2025-07-01 05:48:14.543 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:48:14.549 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:48:14.554 >>> print(''.join(results), end="")
2025-07-01 05:48:14.560 - abcDefghiJkl
2025-07-01 05:48:14.573 + abcdefGhijkl
2025-07-01 05:48:14.587 """
2025-07-01 05:48:14.593
2025-07-01 05:48:14.598 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:48:14.604 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:48:14.610 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:48:14.615 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:48:14.621 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:48:14.630
2025-07-01 05:48:14.638 # search for the pair that matches best without being identical
2025-07-01 05:48:14.644 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:48:14.651 # on junk -- unless we have to)
2025-07-01 05:48:14.657 for j in range(blo, bhi):
2025-07-01 05:48:14.662 bj = b[j]
2025-07-01 05:48:14.670 cruncher.set_seq2(bj)
2025-07-01 05:48:14.684 for i in range(alo, ahi):
2025-07-01 05:48:14.694 ai = a[i]
2025-07-01 05:48:14.701 if ai == bj:
2025-07-01 05:48:14.708 if eqi is None:
2025-07-01 05:48:14.714 eqi, eqj = i, j
2025-07-01 05:48:14.721 continue
2025-07-01 05:48:14.727 cruncher.set_seq1(ai)
2025-07-01 05:48:14.733 # computing similarity is expensive, so use the quick
2025-07-01 05:48:14.739 # upper bounds first -- have seen this speed up messy
2025-07-01 05:48:14.745 # compares by a factor of 3.
2025-07-01 05:48:14.751 # note that ratio() is only expensive to compute the first
2025-07-01 05:48:14.757 # time it's called on a sequence pair; the expensive part
2025-07-01 05:48:14.763 # of the computation is cached by cruncher
2025-07-01 05:48:14.769 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:48:14.775 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:48:14.781 cruncher.ratio() > best_ratio:
2025-07-01 05:48:14.787 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:48:14.793 if best_ratio < cutoff:
2025-07-01 05:48:14.799 # no non-identical "pretty close" pair
2025-07-01 05:48:14.805 if eqi is None:
2025-07-01 05:48:14.811 # no identical pair either -- treat it as a straight replace
2025-07-01 05:48:14.817 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:48:14.823 return
2025-07-01 05:48:14.829 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:48:14.835 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:48:14.841 else:
2025-07-01 05:48:14.847 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:48:14.852 eqi = None
2025-07-01 05:48:14.858
2025-07-01 05:48:14.864 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:48:14.870 # identical
2025-07-01 05:48:14.875
2025-07-01 05:48:14.881 # pump out diffs from before the synch point
2025-07-01 05:48:14.889 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:48:14.900
2025-07-01 05:48:14.911 # do intraline marking on the synch pair
2025-07-01 05:48:14.918 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:48:14.924 if eqi is None:
2025-07-01 05:48:14.929 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:48:14.934 atags = btags = ""
2025-07-01 05:48:14.938 cruncher.set_seqs(aelt, belt)
2025-07-01 05:48:14.943 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:48:14.948 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:48:14.953 if tag == 'replace':
2025-07-01 05:48:14.958 atags += '^' * la
2025-07-01 05:48:14.963 btags += '^' * lb
2025-07-01 05:48:14.968 elif tag == 'delete':
2025-07-01 05:48:14.972 atags += '-' * la
2025-07-01 05:48:14.977 elif tag == 'insert':
2025-07-01 05:48:14.981 btags += '+' * lb
2025-07-01 05:48:14.986 elif tag == 'equal':
2025-07-01 05:48:14.991 atags += ' ' * la
2025-07-01 05:48:14.998 btags += ' ' * lb
2025-07-01 05:48:15.005 else:
2025-07-01 05:48:15.012 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:48:15.018 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:48:15.025 else:
2025-07-01 05:48:15.032 # the synch pair is identical
2025-07-01 05:48:15.038 yield ' ' + aelt
2025-07-01 05:48:15.045
2025-07-01 05:48:15.052 # pump out diffs from after the synch point
2025-07-01 05:48:15.058 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:48:15.063
2025-07-01 05:48:15.069 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:48:15.075 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:48:15.083
2025-07-01 05:48:15.094 self = <difflib.Differ object at [hex]>
2025-07-01 05:48:15.105 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:48:15.111 alo = 286, ahi = 1101
2025-07-01 05:48:15.117 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:48:15.122 blo = 286, bhi = 1101
2025-07-01 05:48:15.128
2025-07-01 05:48:15.134 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:48:15.140 g = []
2025-07-01 05:48:15.147 if alo < ahi:
2025-07-01 05:48:15.155 if blo < bhi:
2025-07-01 05:48:15.166 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:48:15.175 else:
2025-07-01 05:48:15.181 g = self._dump('-', a, alo, ahi)
2025-07-01 05:48:15.187 elif blo < bhi:
2025-07-01 05:48:15.192 g = self._dump('+', b, blo, bhi)
2025-07-01 05:48:15.197
2025-07-01 05:48:15.202 > yield from g
2025-07-01 05:48:15.208
2025-07-01 05:48:15.213 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:48:15.220 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:48:15.225
2025-07-01 05:48:15.231 self = <difflib.Differ object at [hex]>
2025-07-01 05:48:15.237 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:48:15.243 alo = 286, ahi = 1101
2025-07-01 05:48:15.249 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:48:15.255 blo = 286, bhi = 1101
2025-07-01 05:48:15.260
2025-07-01 05:48:15.266 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:48:15.272 r"""
2025-07-01 05:48:15.278 When replacing one block of lines with another, search the blocks
2025-07-01 05:48:15.284 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:48:15.290 synch point, and intraline difference marking is done on the
2025-07-01 05:48:15.296 similar pair. Lots of work, but often worth it.
2025-07-01 05:48:15.302
2025-07-01 05:48:15.308 Example:
2025-07-01 05:48:15.314
2025-07-01 05:48:15.320 >>> d = Differ()
2025-07-01 05:48:15.326 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:48:15.331 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:48:15.337 >>> print(''.join(results), end="")
2025-07-01 05:48:15.343 - abcDefghiJkl
2025-07-01 05:48:15.354 + abcdefGhijkl
2025-07-01 05:48:15.365 """
2025-07-01 05:48:15.370
2025-07-01 05:48:15.376 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:48:15.382 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:48:15.388 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:48:15.393 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:48:15.399 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:48:15.404
2025-07-01 05:48:15.411 # search for the pair that matches best without being identical
2025-07-01 05:48:15.416 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:48:15.421 # on junk -- unless we have to)
2025-07-01 05:48:15.425 for j in range(blo, bhi):
2025-07-01 05:48:15.429 bj = b[j]
2025-07-01 05:48:15.434 cruncher.set_seq2(bj)
2025-07-01 05:48:15.438 for i in range(alo, ahi):
2025-07-01 05:48:15.443 ai = a[i]
2025-07-01 05:48:15.448 if ai == bj:
2025-07-01 05:48:15.454 if eqi is None:
2025-07-01 05:48:15.462 eqi, eqj = i, j
2025-07-01 05:48:15.469 continue
2025-07-01 05:48:15.477 cruncher.set_seq1(ai)
2025-07-01 05:48:15.483 # computing similarity is expensive, so use the quick
2025-07-01 05:48:15.490 # upper bounds first -- have seen this speed up messy
2025-07-01 05:48:15.496 # compares by a factor of 3.
2025-07-01 05:48:15.501 # note that ratio() is only expensive to compute the first
2025-07-01 05:48:15.507 # time it's called on a sequence pair; the expensive part
2025-07-01 05:48:15.513 # of the computation is cached by cruncher
2025-07-01 05:48:15.519 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:48:15.525 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:48:15.530 cruncher.ratio() > best_ratio:
2025-07-01 05:48:15.536 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:48:15.542 if best_ratio < cutoff:
2025-07-01 05:48:15.547 # no non-identical "pretty close" pair
2025-07-01 05:48:15.553 if eqi is None:
2025-07-01 05:48:15.559 # no identical pair either -- treat it as a straight replace
2025-07-01 05:48:15.565 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:48:15.570 return
2025-07-01 05:48:15.576 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:48:15.580 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:48:15.585 else:
2025-07-01 05:48:15.591 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:48:15.597 eqi = None
2025-07-01 05:48:15.602
2025-07-01 05:48:15.609 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:48:15.616 # identical
2025-07-01 05:48:15.623
2025-07-01 05:48:15.629 # pump out diffs from before the synch point
2025-07-01 05:48:15.635 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:48:15.640
2025-07-01 05:48:15.646 # do intraline marking on the synch pair
2025-07-01 05:48:15.658 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:48:15.670 if eqi is None:
2025-07-01 05:48:15.680 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:48:15.687 atags = btags = ""
2025-07-01 05:48:15.693 cruncher.set_seqs(aelt, belt)
2025-07-01 05:48:15.701 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:48:15.706 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:48:15.714 if tag == 'replace':
2025-07-01 05:48:15.723 atags += '^' * la
2025-07-01 05:48:15.730 btags += '^' * lb
2025-07-01 05:48:15.736 elif tag == 'delete':
2025-07-01 05:48:15.743 atags += '-' * la
2025-07-01 05:48:15.749 elif tag == 'insert':
2025-07-01 05:48:15.756 btags += '+' * lb
2025-07-01 05:48:15.763 elif tag == 'equal':
2025-07-01 05:48:15.771 atags += ' ' * la
2025-07-01 05:48:15.782 btags += ' ' * lb
2025-07-01 05:48:15.791 else:
2025-07-01 05:48:15.797 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:48:15.804 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:48:15.809 else:
2025-07-01 05:48:15.814 # the synch pair is identical
2025-07-01 05:48:15.819 yield ' ' + aelt
2025-07-01 05:48:15.824
2025-07-01 05:48:15.830 # pump out diffs from after the synch point
2025-07-01 05:48:15.836 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:48:15.842
2025-07-01 05:48:15.848 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:48:15.855 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:48:15.862
2025-07-01 05:48:15.873 self = <difflib.Differ object at [hex]>
2025-07-01 05:48:15.882 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:48:15.889 alo = 287, ahi = 1101
2025-07-01 05:48:15.900 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:48:15.912 blo = 287, bhi = 1101
2025-07-01 05:48:15.921
2025-07-01 05:48:15.933 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:48:15.945 g = []
2025-07-01 05:48:15.954 if alo < ahi:
2025-07-01 05:48:15.965 if blo < bhi:
2025-07-01 05:48:15.975 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:48:15.983 else:
2025-07-01 05:48:15.990 g = self._dump('-', a, alo, ahi)
2025-07-01 05:48:16.001 elif blo < bhi:
2025-07-01 05:48:16.011 g = self._dump('+', b, blo, bhi)
2025-07-01 05:48:16.019
2025-07-01 05:48:16.026 > yield from g
2025-07-01 05:48:16.033
2025-07-01 05:48:16.039 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:48:16.053 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:48:16.065
2025-07-01 05:48:16.075 self = <difflib.Differ object at [hex]>
2025-07-01 05:48:16.091 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:48:16.103 alo = 287, ahi = 1101
2025-07-01 05:48:16.112 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:48:16.120 blo = 287, bhi = 1101
2025-07-01 05:48:16.126
2025-07-01 05:48:16.133 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:48:16.138 r"""
2025-07-01 05:48:16.148 When replacing one block of lines with another, search the blocks
2025-07-01 05:48:16.157 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:48:16.164 synch point, and intraline difference marking is done on the
2025-07-01 05:48:16.171 similar pair. Lots of work, but often worth it.
2025-07-01 05:48:16.177
2025-07-01 05:48:16.187 Example:
2025-07-01 05:48:16.198
2025-07-01 05:48:16.208 >>> d = Differ()
2025-07-01 05:48:16.221 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:48:16.233 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:48:16.244 >>> print(''.join(results), end="")
2025-07-01 05:48:16.253 - abcDefghiJkl
2025-07-01 05:48:16.277 + abcdefGhijkl
2025-07-01 05:48:16.296 """
2025-07-01 05:48:16.309
2025-07-01 05:48:16.319 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:48:16.329 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:48:16.335 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:48:16.340 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:48:16.345 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:48:16.350
2025-07-01 05:48:16.355 # search for the pair that matches best without being identical
2025-07-01 05:48:16.359 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:48:16.364 # on junk -- unless we have to)
2025-07-01 05:48:16.369 for j in range(blo, bhi):
2025-07-01 05:48:16.373 bj = b[j]
2025-07-01 05:48:16.378 cruncher.set_seq2(bj)
2025-07-01 05:48:16.382 for i in range(alo, ahi):
2025-07-01 05:48:16.390 ai = a[i]
2025-07-01 05:48:16.403 if ai == bj:
2025-07-01 05:48:16.415 if eqi is None:
2025-07-01 05:48:16.421 eqi, eqj = i, j
2025-07-01 05:48:16.428 continue
2025-07-01 05:48:16.434 cruncher.set_seq1(ai)
2025-07-01 05:48:16.439 # computing similarity is expensive, so use the quick
2025-07-01 05:48:16.444 # upper bounds first -- have seen this speed up messy
2025-07-01 05:48:16.448 # compares by a factor of 3.
2025-07-01 05:48:16.457 # note that ratio() is only expensive to compute the first
2025-07-01 05:48:16.467 # time it's called on a sequence pair; the expensive part
2025-07-01 05:48:16.474 # of the computation is cached by cruncher
2025-07-01 05:48:16.483 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:48:16.495 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:48:16.505 cruncher.ratio() > best_ratio:
2025-07-01 05:48:16.516 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:48:16.527 if best_ratio < cutoff:
2025-07-01 05:48:16.538 # no non-identical "pretty close" pair
2025-07-01 05:48:16.548 if eqi is None:
2025-07-01 05:48:16.557 # no identical pair either -- treat it as a straight replace
2025-07-01 05:48:16.569 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:48:16.582 return
2025-07-01 05:48:16.591 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:48:16.599 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:48:16.606 else:
2025-07-01 05:48:16.614 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:48:16.622 eqi = None
2025-07-01 05:48:16.630
2025-07-01 05:48:16.639 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:48:16.647 # identical
2025-07-01 05:48:16.653
2025-07-01 05:48:16.660 # pump out diffs from before the synch point
2025-07-01 05:48:16.666 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:48:16.672
2025-07-01 05:48:16.679 # do intraline marking on the synch pair
2025-07-01 05:48:16.686 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:48:16.692 if eqi is None:
2025-07-01 05:48:16.704 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:48:16.713 atags = btags = ""
2025-07-01 05:48:16.720 cruncher.set_seqs(aelt, belt)
2025-07-01 05:48:16.726 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:48:16.732 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:48:16.738 if tag == 'replace':
2025-07-01 05:48:16.748 atags += '^' * la
2025-07-01 05:48:16.759 btags += '^' * lb
2025-07-01 05:48:16.765 elif tag == 'delete':
2025-07-01 05:48:16.777 atags += '-' * la
2025-07-01 05:48:16.785 elif tag == 'insert':
2025-07-01 05:48:16.791 btags += '+' * lb
2025-07-01 05:48:16.798 elif tag == 'equal':
2025-07-01 05:48:16.805 atags += ' ' * la
2025-07-01 05:48:16.812 btags += ' ' * lb
2025-07-01 05:48:16.818 else:
2025-07-01 05:48:16.826 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:48:16.833 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:48:16.840 else:
2025-07-01 05:48:16.847 # the synch pair is identical
2025-07-01 05:48:16.855 yield ' ' + aelt
2025-07-01 05:48:16.867
2025-07-01 05:48:16.873 # pump out diffs from after the synch point
2025-07-01 05:48:16.879 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:48:16.885
2025-07-01 05:48:16.891 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:48:16.897 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:48:16.902
2025-07-01 05:48:16.908 self = <difflib.Differ object at [hex]>
2025-07-01 05:48:16.915 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:48:16.921 alo = 290, ahi = 1101
2025-07-01 05:48:16.932 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:48:16.943 blo = 290, bhi = 1101
2025-07-01 05:48:16.952
2025-07-01 05:48:16.963 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:48:16.972 g = []
2025-07-01 05:48:16.983 if alo < ahi:
2025-07-01 05:48:16.993 if blo < bhi:
2025-07-01 05:48:17.000 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:48:17.007 else:
2025-07-01 05:48:17.013 g = self._dump('-', a, alo, ahi)
2025-07-01 05:48:17.021 elif blo < bhi:
2025-07-01 05:48:17.031 g = self._dump('+', b, blo, bhi)
2025-07-01 05:48:17.040
2025-07-01 05:48:17.047 > yield from g
2025-07-01 05:48:17.054
2025-07-01 05:48:17.064 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:48:17.073 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:48:17.080
2025-07-01 05:48:17.086 self = <difflib.Differ object at [hex]>
2025-07-01 05:48:17.092 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:48:17.098 alo = 290, ahi = 1101
2025-07-01 05:48:17.104 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:48:17.110 blo = 290, bhi = 1101
2025-07-01 05:48:17.119
2025-07-01 05:48:17.125 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:48:17.131 r"""
2025-07-01 05:48:17.137 When replacing one block of lines with another, search the blocks
2025-07-01 05:48:17.144 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:48:17.151 synch point, and intraline difference marking is done on the
2025-07-01 05:48:17.162 similar pair. Lots of work, but often worth it.
2025-07-01 05:48:17.171
2025-07-01 05:48:17.178 Example:
2025-07-01 05:48:17.184
2025-07-01 05:48:17.189 >>> d = Differ()
2025-07-01 05:48:17.195 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:48:17.202 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:48:17.211 >>> print(''.join(results), end="")
2025-07-01 05:48:17.219 - abcDefghiJkl
2025-07-01 05:48:17.231 + abcdefGhijkl
2025-07-01 05:48:17.240 """
2025-07-01 05:48:17.244
2025-07-01 05:48:17.249 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:48:17.255 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:48:17.261 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:48:17.268 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:48:17.275 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:48:17.284
2025-07-01 05:48:17.296 # search for the pair that matches best without being identical
2025-07-01 05:48:17.305 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:48:17.312 # on junk -- unless we have to)
2025-07-01 05:48:17.319 for j in range(blo, bhi):
2025-07-01 05:48:17.325 bj = b[j]
2025-07-01 05:48:17.332 cruncher.set_seq2(bj)
2025-07-01 05:48:17.339 for i in range(alo, ahi):
2025-07-01 05:48:17.345 ai = a[i]
2025-07-01 05:48:17.352 if ai == bj:
2025-07-01 05:48:17.358 if eqi is None:
2025-07-01 05:48:17.366 eqi, eqj = i, j
2025-07-01 05:48:17.377 continue
2025-07-01 05:48:17.388 cruncher.set_seq1(ai)
2025-07-01 05:48:17.395 # computing similarity is expensive, so use the quick
2025-07-01 05:48:17.401 # upper bounds first -- have seen this speed up messy
2025-07-01 05:48:17.410 # compares by a factor of 3.
2025-07-01 05:48:17.422 # note that ratio() is only expensive to compute the first
2025-07-01 05:48:17.434 # time it's called on a sequence pair; the expensive part
2025-07-01 05:48:17.445 # of the computation is cached by cruncher
2025-07-01 05:48:17.452 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:48:17.459 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:48:17.469 cruncher.ratio() > best_ratio:
2025-07-01 05:48:17.483 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:48:17.494 if best_ratio < cutoff:
2025-07-01 05:48:17.505 # no non-identical "pretty close" pair
2025-07-01 05:48:17.513 if eqi is None:
2025-07-01 05:48:17.522 # no identical pair either -- treat it as a straight replace
2025-07-01 05:48:17.529 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:48:17.536 return
2025-07-01 05:48:17.543 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:48:17.550 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:48:17.555 else:
2025-07-01 05:48:17.560 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:48:17.564 eqi = None
2025-07-01 05:48:17.570
2025-07-01 05:48:17.576 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:48:17.582 # identical
2025-07-01 05:48:17.588
2025-07-01 05:48:17.595 # pump out diffs from before the synch point
2025-07-01 05:48:17.605 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:48:17.617
2025-07-01 05:48:17.629 # do intraline marking on the synch pair
2025-07-01 05:48:17.639 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:48:17.647 if eqi is None:
2025-07-01 05:48:17.653 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:48:17.660 atags = btags = ""
2025-07-01 05:48:17.667 cruncher.set_seqs(aelt, belt)
2025-07-01 05:48:17.675 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:48:17.682 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:48:17.689 if tag == 'replace':
2025-07-01 05:48:17.695 atags += '^' * la
2025-07-01 05:48:17.702 btags += '^' * lb
2025-07-01 05:48:17.709 elif tag == 'delete':
2025-07-01 05:48:17.715 atags += '-' * la
2025-07-01 05:48:17.722 elif tag == 'insert':
2025-07-01 05:48:17.731 btags += '+' * lb
2025-07-01 05:48:17.740 elif tag == 'equal':
2025-07-01 05:48:17.749 atags += ' ' * la
2025-07-01 05:48:17.756 btags += ' ' * lb
2025-07-01 05:48:17.763 else:
2025-07-01 05:48:17.769 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:48:17.776 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:48:17.781 else:
2025-07-01 05:48:17.787 # the synch pair is identical
2025-07-01 05:48:17.795 yield ' ' + aelt
2025-07-01 05:48:17.807
2025-07-01 05:48:17.817 # pump out diffs from after the synch point
2025-07-01 05:48:17.825 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:48:17.832
2025-07-01 05:48:17.838 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:48:17.844 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:48:17.850
2025-07-01 05:48:17.860 self = <difflib.Differ object at [hex]>
2025-07-01 05:48:17.870 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:48:17.877 alo = 291, ahi = 1101
2025-07-01 05:48:17.885 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:48:17.891 blo = 291, bhi = 1101
2025-07-01 05:48:17.896
2025-07-01 05:48:17.903 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:48:17.910 g = []
2025-07-01 05:48:17.916 if alo < ahi:
2025-07-01 05:48:17.924 if blo < bhi:
2025-07-01 05:48:17.934 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:48:17.942 else:
2025-07-01 05:48:17.951 g = self._dump('-', a, alo, ahi)
2025-07-01 05:48:17.962 elif blo < bhi:
2025-07-01 05:48:17.971 g = self._dump('+', b, blo, bhi)
2025-07-01 05:48:17.979
2025-07-01 05:48:17.990 > yield from g
2025-07-01 05:48:17.999
2025-07-01 05:48:18.009 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:48:18.017 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:48:18.024
2025-07-01 05:48:18.032 self = <difflib.Differ object at [hex]>
2025-07-01 05:48:18.039 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:48:18.046 alo = 291, ahi = 1101
2025-07-01 05:48:18.059 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:48:18.068 blo = 291, bhi = 1101
2025-07-01 05:48:18.075
2025-07-01 05:48:18.083 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:48:18.094 r"""
2025-07-01 05:48:18.104 When replacing one block of lines with another, search the blocks
2025-07-01 05:48:18.115 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:48:18.125 synch point, and intraline difference marking is done on the
2025-07-01 05:48:18.135 similar pair. Lots of work, but often worth it.
2025-07-01 05:48:18.145
2025-07-01 05:48:18.154 Example:
2025-07-01 05:48:18.164
2025-07-01 05:48:18.176 >>> d = Differ()
2025-07-01 05:48:18.185 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:48:18.193 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:48:18.199 >>> print(''.join(results), end="")
2025-07-01 05:48:18.204 - abcDefghiJkl
2025-07-01 05:48:18.214 + abcdefGhijkl
2025-07-01 05:48:18.225 """
2025-07-01 05:48:18.233
2025-07-01 05:48:18.241 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:48:18.248 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:48:18.255 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:48:18.263 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:48:18.275 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:48:18.282
2025-07-01 05:48:18.288 # search for the pair that matches best without being identical
2025-07-01 05:48:18.294 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:48:18.299 # on junk -- unless we have to)
2025-07-01 05:48:18.304 for j in range(blo, bhi):
2025-07-01 05:48:18.309 bj = b[j]
2025-07-01 05:48:18.314 cruncher.set_seq2(bj)
2025-07-01 05:48:18.318 for i in range(alo, ahi):
2025-07-01 05:48:18.324 ai = a[i]
2025-07-01 05:48:18.330 if ai == bj:
2025-07-01 05:48:18.336 if eqi is None:
2025-07-01 05:48:18.341 eqi, eqj = i, j
2025-07-01 05:48:18.348 continue
2025-07-01 05:48:18.354 cruncher.set_seq1(ai)
2025-07-01 05:48:18.360 # computing similarity is expensive, so use the quick
2025-07-01 05:48:18.365 # upper bounds first -- have seen this speed up messy
2025-07-01 05:48:18.369 # compares by a factor of 3.
2025-07-01 05:48:18.374 # note that ratio() is only expensive to compute the first
2025-07-01 05:48:18.380 # time it's called on a sequence pair; the expensive part
2025-07-01 05:48:18.386 # of the computation is cached by cruncher
2025-07-01 05:48:18.392 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:48:18.398 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:48:18.407 cruncher.ratio() > best_ratio:
2025-07-01 05:48:18.416 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:48:18.424 if best_ratio < cutoff:
2025-07-01 05:48:18.432 # no non-identical "pretty close" pair
2025-07-01 05:48:18.440 if eqi is None:
2025-07-01 05:48:18.447 # no identical pair either -- treat it as a straight replace
2025-07-01 05:48:18.454 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:48:18.462 return
2025-07-01 05:48:18.467 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:48:18.472 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:48:18.477 else:
2025-07-01 05:48:18.482 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:48:18.487 eqi = None
2025-07-01 05:48:18.492
2025-07-01 05:48:18.501 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:48:18.507 # identical
2025-07-01 05:48:18.514
2025-07-01 05:48:18.521 # pump out diffs from before the synch point
2025-07-01 05:48:18.528 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:48:18.535
2025-07-01 05:48:18.542 # do intraline marking on the synch pair
2025-07-01 05:48:18.549 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:48:18.556 if eqi is None:
2025-07-01 05:48:18.562 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:48:18.568 atags = btags = ""
2025-07-01 05:48:18.575 cruncher.set_seqs(aelt, belt)
2025-07-01 05:48:18.581 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:48:18.586 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:48:18.592 if tag == 'replace':
2025-07-01 05:48:18.600 atags += '^' * la
2025-07-01 05:48:18.609 btags += '^' * lb
2025-07-01 05:48:18.617 elif tag == 'delete':
2025-07-01 05:48:18.623 atags += '-' * la
2025-07-01 05:48:18.629 elif tag == 'insert':
2025-07-01 05:48:18.634 btags += '+' * lb
2025-07-01 05:48:18.638 elif tag == 'equal':
2025-07-01 05:48:18.643 atags += ' ' * la
2025-07-01 05:48:18.648 btags += ' ' * lb
2025-07-01 05:48:18.653 else:
2025-07-01 05:48:18.657 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:48:18.662 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:48:18.667 else:
2025-07-01 05:48:18.675 # the synch pair is identical
2025-07-01 05:48:18.687 yield ' ' + aelt
2025-07-01 05:48:18.696
2025-07-01 05:48:18.704 # pump out diffs from after the synch point
2025-07-01 05:48:18.711 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:48:18.717
2025-07-01 05:48:18.723 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:48:18.730 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:48:18.735
2025-07-01 05:48:18.742 self = <difflib.Differ object at [hex]>
2025-07-01 05:48:18.754 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:48:18.762 alo = 292, ahi = 1101
2025-07-01 05:48:18.769 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:48:18.776 blo = 292, bhi = 1101
2025-07-01 05:48:18.788
2025-07-01 05:48:18.799 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:48:18.808 g = []
2025-07-01 05:48:18.819 if alo < ahi:
2025-07-01 05:48:18.828 if blo < bhi:
2025-07-01 05:48:18.837 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:48:18.844 else:
2025-07-01 05:48:18.851 g = self._dump('-', a, alo, ahi)
2025-07-01 05:48:18.859 elif blo < bhi:
2025-07-01 05:48:18.870 g = self._dump('+', b, blo, bhi)
2025-07-01 05:48:18.879
2025-07-01 05:48:18.887 > yield from g
2025-07-01 05:48:18.893
2025-07-01 05:48:18.900 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:48:18.906 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:48:18.911
2025-07-01 05:48:18.919 self = <difflib.Differ object at [hex]>
2025-07-01 05:48:18.930 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:48:18.938 alo = 292, ahi = 1101
2025-07-01 05:48:18.945 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:48:18.951 blo = 292, bhi = 1101
2025-07-01 05:48:18.956
2025-07-01 05:48:18.962 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:48:18.972 r"""
2025-07-01 05:48:18.981 When replacing one block of lines with another, search the blocks
2025-07-01 05:48:18.989 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:48:18.995 synch point, and intraline difference marking is done on the
2025-07-01 05:48:19.002 similar pair. Lots of work, but often worth it.
2025-07-01 05:48:19.007
2025-07-01 05:48:19.015 Example:
2025-07-01 05:48:19.026
2025-07-01 05:48:19.034 >>> d = Differ()
2025-07-01 05:48:19.043 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:48:19.055 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:48:19.067 >>> print(''.join(results), end="")
2025-07-01 05:48:19.078 - abcDefghiJkl
2025-07-01 05:48:19.096 + abcdefGhijkl
2025-07-01 05:48:19.109 """
2025-07-01 05:48:19.115
2025-07-01 05:48:19.121 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:48:19.127 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:48:19.133 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:48:19.138 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:48:19.144 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:48:19.150
2025-07-01 05:48:19.155 # search for the pair that matches best without being identical
2025-07-01 05:48:19.167 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:48:19.175 # on junk -- unless we have to)
2025-07-01 05:48:19.183 for j in range(blo, bhi):
2025-07-01 05:48:19.189 bj = b[j]
2025-07-01 05:48:19.195 cruncher.set_seq2(bj)
2025-07-01 05:48:19.201 for i in range(alo, ahi):
2025-07-01 05:48:19.206 ai = a[i]
2025-07-01 05:48:19.211 if ai == bj:
2025-07-01 05:48:19.220 if eqi is None:
2025-07-01 05:48:19.230 eqi, eqj = i, j
2025-07-01 05:48:19.237 continue
2025-07-01 05:48:19.244 cruncher.set_seq1(ai)
2025-07-01 05:48:19.256 # computing similarity is expensive, so use the quick
2025-07-01 05:48:19.268 # upper bounds first -- have seen this speed up messy
2025-07-01 05:48:19.279 # compares by a factor of 3.
2025-07-01 05:48:19.293 # note that ratio() is only expensive to compute the first
2025-07-01 05:48:19.303 # time it's called on a sequence pair; the expensive part
2025-07-01 05:48:19.310 # of the computation is cached by cruncher
2025-07-01 05:48:19.320 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:48:19.331 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:48:19.340 cruncher.ratio() > best_ratio:
2025-07-01 05:48:19.348 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:48:19.359 if best_ratio < cutoff:
2025-07-01 05:48:19.367 # no non-identical "pretty close" pair
2025-07-01 05:48:19.375 if eqi is None:
2025-07-01 05:48:19.386 # no identical pair either -- treat it as a straight replace
2025-07-01 05:48:19.395 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:48:19.407 return
2025-07-01 05:48:19.418 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:48:19.427 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:48:19.435 else:
2025-07-01 05:48:19.442 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:48:19.453 eqi = None
2025-07-01 05:48:19.463
2025-07-01 05:48:19.472 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:48:19.479 # identical
2025-07-01 05:48:19.486
2025-07-01 05:48:19.492 # pump out diffs from before the synch point
2025-07-01 05:48:19.498 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:48:19.503
2025-07-01 05:48:19.513 # do intraline marking on the synch pair
2025-07-01 05:48:19.525 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:48:19.535 if eqi is None:
2025-07-01 05:48:19.546 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:48:19.555 atags = btags = ""
2025-07-01 05:48:19.563 cruncher.set_seqs(aelt, belt)
2025-07-01 05:48:19.573 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:48:19.582 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:48:19.589 if tag == 'replace':
2025-07-01 05:48:19.595 atags += '^' * la
2025-07-01 05:48:19.600 btags += '^' * lb
2025-07-01 05:48:19.605 elif tag == 'delete':
2025-07-01 05:48:19.611 atags += '-' * la
2025-07-01 05:48:19.616 elif tag == 'insert':
2025-07-01 05:48:19.626 btags += '+' * lb
2025-07-01 05:48:19.634 elif tag == 'equal':
2025-07-01 05:48:19.643 atags += ' ' * la
2025-07-01 05:48:19.650 btags += ' ' * lb
2025-07-01 05:48:19.657 else:
2025-07-01 05:48:19.668 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:48:19.678 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:48:19.688 else:
2025-07-01 05:48:19.697 # the synch pair is identical
2025-07-01 05:48:19.705 yield ' ' + aelt
2025-07-01 05:48:19.711
2025-07-01 05:48:19.720 # pump out diffs from after the synch point
2025-07-01 05:48:19.731 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:48:19.740
2025-07-01 05:48:19.747 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:48:19.753 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:48:19.760
2025-07-01 05:48:19.765 self = <difflib.Differ object at [hex]>
2025-07-01 05:48:19.772 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:48:19.777 alo = 293, ahi = 1101
2025-07-01 05:48:19.782 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:48:19.788 blo = 293, bhi = 1101
2025-07-01 05:48:19.792
2025-07-01 05:48:19.796 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:48:19.801 g = []
2025-07-01 05:48:19.808 if alo < ahi:
2025-07-01 05:48:19.816 if blo < bhi:
2025-07-01 05:48:19.822 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:48:19.827 else:
2025-07-01 05:48:19.835 g = self._dump('-', a, alo, ahi)
2025-07-01 05:48:19.846 elif blo < bhi:
2025-07-01 05:48:19.858 g = self._dump('+', b, blo, bhi)
2025-07-01 05:48:19.868
2025-07-01 05:48:19.877 > yield from g
2025-07-01 05:48:19.884
2025-07-01 05:48:19.891 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:48:19.897 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:48:19.903
2025-07-01 05:48:19.909 self = <difflib.Differ object at [hex]>
2025-07-01 05:48:19.921 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:48:19.928 alo = 293, ahi = 1101
2025-07-01 05:48:19.939 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:48:19.949 blo = 293, bhi = 1101
2025-07-01 05:48:19.959
2025-07-01 05:48:19.972 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:48:19.980 r"""
2025-07-01 05:48:19.988 When replacing one block of lines with another, search the blocks
2025-07-01 05:48:19.994 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:48:20.000 synch point, and intraline difference marking is done on the
2025-07-01 05:48:20.008 similar pair. Lots of work, but often worth it.
2025-07-01 05:48:20.015
2025-07-01 05:48:20.022 Example:
2025-07-01 05:48:20.029
2025-07-01 05:48:20.036 >>> d = Differ()
2025-07-01 05:48:20.044 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:48:20.054 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:48:20.065 >>> print(''.join(results), end="")
2025-07-01 05:48:20.072 - abcDefghiJkl
2025-07-01 05:48:20.087 + abcdefGhijkl
2025-07-01 05:48:20.105 """
2025-07-01 05:48:20.112
2025-07-01 05:48:20.119 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:48:20.126 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:48:20.137 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:48:20.146 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:48:20.154 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:48:20.165
2025-07-01 05:48:20.173 # search for the pair that matches best without being identical
2025-07-01 05:48:20.185 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:48:20.192 # on junk -- unless we have to)
2025-07-01 05:48:20.199 for j in range(blo, bhi):
2025-07-01 05:48:20.207 bj = b[j]
2025-07-01 05:48:20.218 cruncher.set_seq2(bj)
2025-07-01 05:48:20.228 for i in range(alo, ahi):
2025-07-01 05:48:20.237 ai = a[i]
2025-07-01 05:48:20.245 if ai == bj:
2025-07-01 05:48:20.251 if eqi is None:
2025-07-01 05:48:20.257 eqi, eqj = i, j
2025-07-01 05:48:20.262 continue
2025-07-01 05:48:20.272 cruncher.set_seq1(ai)
2025-07-01 05:48:20.283 # computing similarity is expensive, so use the quick
2025-07-01 05:48:20.291 # upper bounds first -- have seen this speed up messy
2025-07-01 05:48:20.299 # compares by a factor of 3.
2025-07-01 05:48:20.306 # note that ratio() is only expensive to compute the first
2025-07-01 05:48:20.316 # time it's called on a sequence pair; the expensive part
2025-07-01 05:48:20.327 # of the computation is cached by cruncher
2025-07-01 05:48:20.336 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:48:20.348 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:48:20.359 cruncher.ratio() > best_ratio:
2025-07-01 05:48:20.367 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:48:20.375 if best_ratio < cutoff:
2025-07-01 05:48:20.382 # no non-identical "pretty close" pair
2025-07-01 05:48:20.388 if eqi is None:
2025-07-01 05:48:20.394 # no identical pair either -- treat it as a straight replace
2025-07-01 05:48:20.399 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:48:20.404 return
2025-07-01 05:48:20.409 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:48:20.415 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:48:20.420 else:
2025-07-01 05:48:20.426 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:48:20.433 eqi = None
2025-07-01 05:48:20.438
2025-07-01 05:48:20.445 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:48:20.455 # identical
2025-07-01 05:48:20.466
2025-07-01 05:48:20.475 # pump out diffs from before the synch point
2025-07-01 05:48:20.483 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:48:20.490
2025-07-01 05:48:20.499 # do intraline marking on the synch pair
2025-07-01 05:48:20.513 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:48:20.524 if eqi is None:
2025-07-01 05:48:20.536 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:48:20.545 atags = btags = ""
2025-07-01 05:48:20.557 cruncher.set_seqs(aelt, belt)
2025-07-01 05:48:20.571 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:48:20.581 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:48:20.594 if tag == 'replace':
2025-07-01 05:48:20.601 atags += '^' * la
2025-07-01 05:48:20.609 btags += '^' * lb
2025-07-01 05:48:20.616 elif tag == 'delete':
2025-07-01 05:48:20.623 atags += '-' * la
2025-07-01 05:48:20.630 elif tag == 'insert':
2025-07-01 05:48:20.638 btags += '+' * lb
2025-07-01 05:48:20.645 elif tag == 'equal':
2025-07-01 05:48:20.652 atags += ' ' * la
2025-07-01 05:48:20.659 btags += ' ' * lb
2025-07-01 05:48:20.666 else:
2025-07-01 05:48:20.675 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:48:20.686 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:48:20.694 else:
2025-07-01 05:48:20.700 # the synch pair is identical
2025-07-01 05:48:20.706 yield ' ' + aelt
2025-07-01 05:48:20.713
2025-07-01 05:48:20.720 # pump out diffs from after the synch point
2025-07-01 05:48:20.727 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:48:20.733
2025-07-01 05:48:20.740 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:48:20.747 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:48:20.754
2025-07-01 05:48:20.763 self = <difflib.Differ object at [hex]>
2025-07-01 05:48:20.774 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:48:20.781 alo = 294, ahi = 1101
2025-07-01 05:48:20.789 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:48:20.797 blo = 294, bhi = 1101
2025-07-01 05:48:20.804
2025-07-01 05:48:20.810 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:48:20.817 g = []
2025-07-01 05:48:20.824 if alo < ahi:
2025-07-01 05:48:20.831 if blo < bhi:
2025-07-01 05:48:20.840 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:48:20.847 else:
2025-07-01 05:48:20.855 g = self._dump('-', a, alo, ahi)
2025-07-01 05:48:20.867 elif blo < bhi:
2025-07-01 05:48:20.877 g = self._dump('+', b, blo, bhi)
2025-07-01 05:48:20.886
2025-07-01 05:48:20.892 > yield from g
2025-07-01 05:48:20.898
2025-07-01 05:48:20.909 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:48:20.922 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:48:20.930
2025-07-01 05:48:20.939 self = <difflib.Differ object at [hex]>
2025-07-01 05:48:20.952 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:48:20.963 alo = 294, ahi = 1101
2025-07-01 05:48:20.972 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:48:20.979 blo = 294, bhi = 1101
2025-07-01 05:48:20.985
2025-07-01 05:48:20.991 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:48:20.999 r"""
2025-07-01 05:48:21.007 When replacing one block of lines with another, search the blocks
2025-07-01 05:48:21.014 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:48:21.025 synch point, and intraline difference marking is done on the
2025-07-01 05:48:21.037 similar pair. Lots of work, but often worth it.
2025-07-01 05:48:21.047
2025-07-01 05:48:21.056 Example:
2025-07-01 05:48:21.063
2025-07-01 05:48:21.069 >>> d = Differ()
2025-07-01 05:48:21.077 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:48:21.087 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:48:21.095 >>> print(''.join(results), end="")
2025-07-01 05:48:21.102 - abcDefghiJkl
2025-07-01 05:48:21.122 + abcdefGhijkl
2025-07-01 05:48:21.137 """
2025-07-01 05:48:21.145
2025-07-01 05:48:21.153 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:48:21.158 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:48:21.163 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:48:21.168 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:48:21.173 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:48:21.178
2025-07-01 05:48:21.183 # search for the pair that matches best without being identical
2025-07-01 05:48:21.188 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:48:21.193 # on junk -- unless we have to)
2025-07-01 05:48:21.198 for j in range(blo, bhi):
2025-07-01 05:48:21.202 bj = b[j]
2025-07-01 05:48:21.208 cruncher.set_seq2(bj)
2025-07-01 05:48:21.214 for i in range(alo, ahi):
2025-07-01 05:48:21.226 ai = a[i]
2025-07-01 05:48:21.241 if ai == bj:
2025-07-01 05:48:21.250 if eqi is None:
2025-07-01 05:48:21.259 eqi, eqj = i, j
2025-07-01 05:48:21.270 continue
2025-07-01 05:48:21.278 cruncher.set_seq1(ai)
2025-07-01 05:48:21.284 # computing similarity is expensive, so use the quick
2025-07-01 05:48:21.291 # upper bounds first -- have seen this speed up messy
2025-07-01 05:48:21.298 # compares by a factor of 3.
2025-07-01 05:48:21.304 # note that ratio() is only expensive to compute the first
2025-07-01 05:48:21.310 # time it's called on a sequence pair; the expensive part
2025-07-01 05:48:21.316 # of the computation is cached by cruncher
2025-07-01 05:48:21.322 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:48:21.329 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:48:21.338 cruncher.ratio() > best_ratio:
2025-07-01 05:48:21.345 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:48:21.350 if best_ratio < cutoff:
2025-07-01 05:48:21.356 # no non-identical "pretty close" pair
2025-07-01 05:48:21.362 if eqi is None:
2025-07-01 05:48:21.371 # no identical pair either -- treat it as a straight replace
2025-07-01 05:48:21.381 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:48:21.388 return
2025-07-01 05:48:21.395 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:48:21.402 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:48:21.408 else:
2025-07-01 05:48:21.420 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:48:21.427 eqi = None
2025-07-01 05:48:21.435
2025-07-01 05:48:21.446 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:48:21.454 # identical
2025-07-01 05:48:21.461
2025-07-01 05:48:21.468 # pump out diffs from before the synch point
2025-07-01 05:48:21.480 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:48:21.488
2025-07-01 05:48:21.497 # do intraline marking on the synch pair
2025-07-01 05:48:21.503 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:48:21.508 if eqi is None:
2025-07-01 05:48:21.514 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:48:21.520 atags = btags = ""
2025-07-01 05:48:21.526 cruncher.set_seqs(aelt, belt)
2025-07-01 05:48:21.536 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:48:21.546 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:48:21.555 if tag == 'replace':
2025-07-01 05:48:21.562 atags += '^' * la
2025-07-01 05:48:21.570 btags += '^' * lb
2025-07-01 05:48:21.576 elif tag == 'delete':
2025-07-01 05:48:21.585 atags += '-' * la
2025-07-01 05:48:21.596 elif tag == 'insert':
2025-07-01 05:48:21.606 btags += '+' * lb
2025-07-01 05:48:21.615 elif tag == 'equal':
2025-07-01 05:48:21.623 atags += ' ' * la
2025-07-01 05:48:21.631 btags += ' ' * lb
2025-07-01 05:48:21.644 else:
2025-07-01 05:48:21.653 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:48:21.665 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:48:21.675 else:
2025-07-01 05:48:21.688 # the synch pair is identical
2025-07-01 05:48:21.699 yield ' ' + aelt
2025-07-01 05:48:21.708
2025-07-01 05:48:21.717 # pump out diffs from after the synch point
2025-07-01 05:48:21.725 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:48:21.730
2025-07-01 05:48:21.735 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:48:21.743 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:48:21.753
2025-07-01 05:48:21.760 self = <difflib.Differ object at [hex]>
2025-07-01 05:48:21.769 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:48:21.776 alo = 295, ahi = 1101
2025-07-01 05:48:21.788 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:48:21.797 blo = 295, bhi = 1101
2025-07-01 05:48:21.806
2025-07-01 05:48:21.816 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:48:21.824 g = []
2025-07-01 05:48:21.830 if alo < ahi:
2025-07-01 05:48:21.835 if blo < bhi:
2025-07-01 05:48:21.840 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:48:21.844 else:
2025-07-01 05:48:21.850 g = self._dump('-', a, alo, ahi)
2025-07-01 05:48:21.855 elif blo < bhi:
2025-07-01 05:48:21.862 g = self._dump('+', b, blo, bhi)
2025-07-01 05:48:21.868
2025-07-01 05:48:21.874 > yield from g
2025-07-01 05:48:21.884
2025-07-01 05:48:21.897 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:48:21.904 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:48:21.910
2025-07-01 05:48:21.919 self = <difflib.Differ object at [hex]>
2025-07-01 05:48:21.930 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:48:21.938 alo = 295, ahi = 1101
2025-07-01 05:48:21.949 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:48:21.963 blo = 295, bhi = 1101
2025-07-01 05:48:21.973
2025-07-01 05:48:21.982 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:48:21.990 r"""
2025-07-01 05:48:22.002 When replacing one block of lines with another, search the blocks
2025-07-01 05:48:22.014 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:48:22.026 synch point, and intraline difference marking is done on the
2025-07-01 05:48:22.035 similar pair. Lots of work, but often worth it.
2025-07-01 05:48:22.043
2025-07-01 05:48:22.051 Example:
2025-07-01 05:48:22.063
2025-07-01 05:48:22.072 >>> d = Differ()
2025-07-01 05:48:22.080 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:48:22.090 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:48:22.099 >>> print(''.join(results), end="")
2025-07-01 05:48:22.111 - abcDefghiJkl
2025-07-01 05:48:22.128 + abcdefGhijkl
2025-07-01 05:48:22.139 """
2025-07-01 05:48:22.146
2025-07-01 05:48:22.156 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:48:22.165 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:48:22.172 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:48:22.179 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:48:22.186 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:48:22.197
2025-07-01 05:48:22.209 # search for the pair that matches best without being identical
2025-07-01 05:48:22.219 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:48:22.233 # on junk -- unless we have to)
2025-07-01 05:48:22.243 for j in range(blo, bhi):
2025-07-01 05:48:22.256 bj = b[j]
2025-07-01 05:48:22.267 cruncher.set_seq2(bj)
2025-07-01 05:48:22.277 for i in range(alo, ahi):
2025-07-01 05:48:22.285 ai = a[i]
2025-07-01 05:48:22.292 if ai == bj:
2025-07-01 05:48:22.298 if eqi is None:
2025-07-01 05:48:22.310 eqi, eqj = i, j
2025-07-01 05:48:22.319 continue
2025-07-01 05:48:22.328 cruncher.set_seq1(ai)
2025-07-01 05:48:22.334 # computing similarity is expensive, so use the quick
2025-07-01 05:48:22.340 # upper bounds first -- have seen this speed up messy
2025-07-01 05:48:22.347 # compares by a factor of 3.
2025-07-01 05:48:22.353 # note that ratio() is only expensive to compute the first
2025-07-01 05:48:22.360 # time it's called on a sequence pair; the expensive part
2025-07-01 05:48:22.367 # of the computation is cached by cruncher
2025-07-01 05:48:22.374 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:48:22.380 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:48:22.386 cruncher.ratio() > best_ratio:
2025-07-01 05:48:22.396 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:48:22.408 if best_ratio < cutoff:
2025-07-01 05:48:22.419 # no non-identical "pretty close" pair
2025-07-01 05:48:22.427 if eqi is None:
2025-07-01 05:48:22.435 # no identical pair either -- treat it as a straight replace
2025-07-01 05:48:22.446 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:48:22.454 return
2025-07-01 05:48:22.461 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:48:22.467 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:48:22.472 else:
2025-07-01 05:48:22.477 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:48:22.481 eqi = None
2025-07-01 05:48:22.487
2025-07-01 05:48:22.495 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:48:22.506 # identical
2025-07-01 05:48:22.514
2025-07-01 05:48:22.520 # pump out diffs from before the synch point
2025-07-01 05:48:22.526 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:48:22.537
2025-07-01 05:48:22.548 # do intraline marking on the synch pair
2025-07-01 05:48:22.556 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:48:22.563 if eqi is None:
2025-07-01 05:48:22.570 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:48:22.576 atags = btags = ""
2025-07-01 05:48:22.587 cruncher.set_seqs(aelt, belt)
2025-07-01 05:48:22.596 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:48:22.604 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:48:22.618 if tag == 'replace':
2025-07-01 05:48:22.627 atags += '^' * la
2025-07-01 05:48:22.636 btags += '^' * lb
2025-07-01 05:48:22.645 elif tag == 'delete':
2025-07-01 05:48:22.657 atags += '-' * la
2025-07-01 05:48:22.666 elif tag == 'insert':
2025-07-01 05:48:22.675 btags += '+' * lb
2025-07-01 05:48:22.687 elif tag == 'equal':
2025-07-01 05:48:22.697 atags += ' ' * la
2025-07-01 05:48:22.705 btags += ' ' * lb
2025-07-01 05:48:22.717 else:
2025-07-01 05:48:22.729 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:48:22.742 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:48:22.751 else:
2025-07-01 05:48:22.760 # the synch pair is identical
2025-07-01 05:48:22.768 yield ' ' + aelt
2025-07-01 05:48:22.773
2025-07-01 05:48:22.779 # pump out diffs from after the synch point
2025-07-01 05:48:22.785 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:48:22.790
2025-07-01 05:48:22.800 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:48:22.810 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:48:22.818
2025-07-01 05:48:22.824 self = <difflib.Differ object at [hex]>
2025-07-01 05:48:22.832 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:48:22.838 alo = 296, ahi = 1101
2025-07-01 05:48:22.849 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:48:22.857 blo = 296, bhi = 1101
2025-07-01 05:48:22.868
2025-07-01 05:48:22.875 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:48:22.881 g = []
2025-07-01 05:48:22.887 if alo < ahi:
2025-07-01 05:48:22.893 if blo < bhi:
2025-07-01 05:48:22.898 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:48:22.904 else:
2025-07-01 05:48:22.910 g = self._dump('-', a, alo, ahi)
2025-07-01 05:48:22.915 elif blo < bhi:
2025-07-01 05:48:22.922 g = self._dump('+', b, blo, bhi)
2025-07-01 05:48:22.928
2025-07-01 05:48:22.934 > yield from g
2025-07-01 05:48:22.941
2025-07-01 05:48:22.949 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:48:22.955 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:48:22.962
2025-07-01 05:48:22.973 self = <difflib.Differ object at [hex]>
2025-07-01 05:48:22.984 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:48:22.992 alo = 296, ahi = 1101
2025-07-01 05:48:23.001 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:48:23.008 blo = 296, bhi = 1101
2025-07-01 05:48:23.020
2025-07-01 05:48:23.032 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:48:23.044 r"""
2025-07-01 05:48:23.056 When replacing one block of lines with another, search the blocks
2025-07-01 05:48:23.066 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:48:23.075 synch point, and intraline difference marking is done on the
2025-07-01 05:48:23.085 similar pair. Lots of work, but often worth it.
2025-07-01 05:48:23.095
2025-07-01 05:48:23.103 Example:
2025-07-01 05:48:23.110
2025-07-01 05:48:23.121 >>> d = Differ()
2025-07-01 05:48:23.131 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:48:23.139 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:48:23.147 >>> print(''.join(results), end="")
2025-07-01 05:48:23.153 - abcDefghiJkl
2025-07-01 05:48:23.164 + abcdefGhijkl
2025-07-01 05:48:23.177 """
2025-07-01 05:48:23.182
2025-07-01 05:48:23.191 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:48:23.203 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:48:23.213 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:48:23.224 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:48:23.237 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:48:23.248
2025-07-01 05:48:23.256 # search for the pair that matches best without being identical
2025-07-01 05:48:23.264 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:48:23.270 # on junk -- unless we have to)
2025-07-01 05:48:23.275 for j in range(blo, bhi):
2025-07-01 05:48:23.285 bj = b[j]
2025-07-01 05:48:23.298 cruncher.set_seq2(bj)
2025-07-01 05:48:23.305 for i in range(alo, ahi):
2025-07-01 05:48:23.312 ai = a[i]
2025-07-01 05:48:23.318 if ai == bj:
2025-07-01 05:48:23.325 if eqi is None:
2025-07-01 05:48:23.330 eqi, eqj = i, j
2025-07-01 05:48:23.335 continue
2025-07-01 05:48:23.339 cruncher.set_seq1(ai)
2025-07-01 05:48:23.344 # computing similarity is expensive, so use the quick
2025-07-01 05:48:23.349 # upper bounds first -- have seen this speed up messy
2025-07-01 05:48:23.354 # compares by a factor of 3.
2025-07-01 05:48:23.359 # note that ratio() is only expensive to compute the first
2025-07-01 05:48:23.365 # time it's called on a sequence pair; the expensive part
2025-07-01 05:48:23.370 # of the computation is cached by cruncher
2025-07-01 05:48:23.376 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:48:23.382 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:48:23.391 cruncher.ratio() > best_ratio:
2025-07-01 05:48:23.400 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:48:23.407 if best_ratio < cutoff:
2025-07-01 05:48:23.414 # no non-identical "pretty close" pair
2025-07-01 05:48:23.420 if eqi is None:
2025-07-01 05:48:23.426 # no identical pair either -- treat it as a straight replace
2025-07-01 05:48:23.433 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:48:23.438 return
2025-07-01 05:48:23.444 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:48:23.450 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:48:23.455 else:
2025-07-01 05:48:23.460 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:48:23.464 eqi = None
2025-07-01 05:48:23.469
2025-07-01 05:48:23.475 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:48:23.481 # identical
2025-07-01 05:48:23.487
2025-07-01 05:48:23.494 # pump out diffs from before the synch point
2025-07-01 05:48:23.500 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:48:23.505
2025-07-01 05:48:23.513 # do intraline marking on the synch pair
2025-07-01 05:48:23.520 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:48:23.528 if eqi is None:
2025-07-01 05:48:23.535 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:48:23.543 atags = btags = ""
2025-07-01 05:48:23.550 cruncher.set_seqs(aelt, belt)
2025-07-01 05:48:23.557 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:48:23.564 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:48:23.571 if tag == 'replace':
2025-07-01 05:48:23.579 atags += '^' * la
2025-07-01 05:48:23.586 btags += '^' * lb
2025-07-01 05:48:23.597 elif tag == 'delete':
2025-07-01 05:48:23.607 atags += '-' * la
2025-07-01 05:48:23.614 elif tag == 'insert':
2025-07-01 05:48:23.621 btags += '+' * lb
2025-07-01 05:48:23.627 elif tag == 'equal':
2025-07-01 05:48:23.633 atags += ' ' * la
2025-07-01 05:48:23.639 btags += ' ' * lb
2025-07-01 05:48:23.646 else:
2025-07-01 05:48:23.654 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:48:23.661 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:48:23.666 else:
2025-07-01 05:48:23.672 # the synch pair is identical
2025-07-01 05:48:23.678 yield ' ' + aelt
2025-07-01 05:48:23.686
2025-07-01 05:48:23.698 # pump out diffs from after the synch point
2025-07-01 05:48:23.707 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:48:23.714
2025-07-01 05:48:23.722 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:48:23.729 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:48:23.735
2025-07-01 05:48:23.741 self = <difflib.Differ object at [hex]>
2025-07-01 05:48:23.752 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:48:23.764 alo = 297, ahi = 1101
2025-07-01 05:48:23.772 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:48:23.787 blo = 297, bhi = 1101
2025-07-01 05:48:23.797
2025-07-01 05:48:23.810 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:48:23.820 g = []
2025-07-01 05:48:23.828 if alo < ahi:
2025-07-01 05:48:23.835 if blo < bhi:
2025-07-01 05:48:23.842 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:48:23.852 else:
2025-07-01 05:48:23.862 g = self._dump('-', a, alo, ahi)
2025-07-01 05:48:23.870 elif blo < bhi:
2025-07-01 05:48:23.881 g = self._dump('+', b, blo, bhi)
2025-07-01 05:48:23.890
2025-07-01 05:48:23.899 > yield from g
2025-07-01 05:48:23.907
2025-07-01 05:48:23.914 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:48:23.922 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:48:23.931
2025-07-01 05:48:23.940 self = <difflib.Differ object at [hex]>
2025-07-01 05:48:23.952 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:48:23.964 alo = 297, ahi = 1101
2025-07-01 05:48:23.976 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:48:23.984 blo = 297, bhi = 1101
2025-07-01 05:48:23.991
2025-07-01 05:48:23.998 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:48:24.005 r"""
2025-07-01 05:48:24.017 When replacing one block of lines with another, search the blocks
2025-07-01 05:48:24.028 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:48:24.035 synch point, and intraline difference marking is done on the
2025-07-01 05:48:24.043 similar pair. Lots of work, but often worth it.
2025-07-01 05:48:24.051
2025-07-01 05:48:24.062 Example:
2025-07-01 05:48:24.072
2025-07-01 05:48:24.079 >>> d = Differ()
2025-07-01 05:48:24.085 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:48:24.092 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:48:24.098 >>> print(''.join(results), end="")
2025-07-01 05:48:24.103 - abcDefghiJkl
2025-07-01 05:48:24.121 + abcdefGhijkl
2025-07-01 05:48:24.144 """
2025-07-01 05:48:24.155
2025-07-01 05:48:24.165 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:48:24.175 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:48:24.183 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:48:24.195 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:48:24.204 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:48:24.210
2025-07-01 05:48:24.215 # search for the pair that matches best without being identical
2025-07-01 05:48:24.220 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:48:24.225 # on junk -- unless we have to)
2025-07-01 05:48:24.230 for j in range(blo, bhi):
2025-07-01 05:48:24.236 bj = b[j]
2025-07-01 05:48:24.242 cruncher.set_seq2(bj)
2025-07-01 05:48:24.253 for i in range(alo, ahi):
2025-07-01 05:48:24.264 ai = a[i]
2025-07-01 05:48:24.275 if ai == bj:
2025-07-01 05:48:24.283 if eqi is None:
2025-07-01 05:48:24.291 eqi, eqj = i, j
2025-07-01 05:48:24.298 continue
2025-07-01 05:48:24.306 cruncher.set_seq1(ai)
2025-07-01 05:48:24.312 # computing similarity is expensive, so use the quick
2025-07-01 05:48:24.319 # upper bounds first -- have seen this speed up messy
2025-07-01 05:48:24.325 # compares by a factor of 3.
2025-07-01 05:48:24.330 # note that ratio() is only expensive to compute the first
2025-07-01 05:48:24.336 # time it's called on a sequence pair; the expensive part
2025-07-01 05:48:24.344 # of the computation is cached by cruncher
2025-07-01 05:48:24.351 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:48:24.358 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:48:24.365 cruncher.ratio() > best_ratio:
2025-07-01 05:48:24.373 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:48:24.379 if best_ratio < cutoff:
2025-07-01 05:48:24.386 # no non-identical "pretty close" pair
2025-07-01 05:48:24.396 if eqi is None:
2025-07-01 05:48:24.404 # no identical pair either -- treat it as a straight replace
2025-07-01 05:48:24.411 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:48:24.416 return
2025-07-01 05:48:24.422 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:48:24.428 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:48:24.435 else:
2025-07-01 05:48:24.443 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:48:24.454 eqi = None
2025-07-01 05:48:24.462
2025-07-01 05:48:24.471 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:48:24.480 # identical
2025-07-01 05:48:24.490
2025-07-01 05:48:24.496 # pump out diffs from before the synch point
2025-07-01 05:48:24.502 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:48:24.508
2025-07-01 05:48:24.515 # do intraline marking on the synch pair
2025-07-01 05:48:24.522 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:48:24.529 if eqi is None:
2025-07-01 05:48:24.536 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:48:24.542 atags = btags = ""
2025-07-01 05:48:24.549 cruncher.set_seqs(aelt, belt)
2025-07-01 05:48:24.556 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:48:24.563 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:48:24.570 if tag == 'replace':
2025-07-01 05:48:24.577 atags += '^' * la
2025-07-01 05:48:24.584 btags += '^' * lb
2025-07-01 05:48:24.590 elif tag == 'delete':
2025-07-01 05:48:24.597 atags += '-' * la
2025-07-01 05:48:24.604 elif tag == 'insert':
2025-07-01 05:48:24.611 btags += '+' * lb
2025-07-01 05:48:24.619 elif tag == 'equal':
2025-07-01 05:48:24.630 atags += ' ' * la
2025-07-01 05:48:24.638 btags += ' ' * lb
2025-07-01 05:48:24.645 else:
2025-07-01 05:48:24.650 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:48:24.657 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:48:24.662 else:
2025-07-01 05:48:24.668 # the synch pair is identical
2025-07-01 05:48:24.680 yield ' ' + aelt
2025-07-01 05:48:24.691
2025-07-01 05:48:24.702 # pump out diffs from after the synch point
2025-07-01 05:48:24.712 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:48:24.718
2025-07-01 05:48:24.724 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:48:24.730 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:48:24.739
2025-07-01 05:48:24.749 self = <difflib.Differ object at [hex]>
2025-07-01 05:48:24.761 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:48:24.773 alo = 298, ahi = 1101
2025-07-01 05:48:24.782 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:48:24.792 blo = 298, bhi = 1101
2025-07-01 05:48:24.803
2025-07-01 05:48:24.812 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:48:24.820 g = []
2025-07-01 05:48:24.827 if alo < ahi:
2025-07-01 05:48:24.838 if blo < bhi:
2025-07-01 05:48:24.846 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:48:24.857 else:
2025-07-01 05:48:24.867 g = self._dump('-', a, alo, ahi)
2025-07-01 05:48:24.873 elif blo < bhi:
2025-07-01 05:48:24.879 g = self._dump('+', b, blo, bhi)
2025-07-01 05:48:24.885
2025-07-01 05:48:24.889 > yield from g
2025-07-01 05:48:24.895
2025-07-01 05:48:24.901 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:48:24.907 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:48:24.912
2025-07-01 05:48:24.919 self = <difflib.Differ object at [hex]>
2025-07-01 05:48:24.925 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:48:24.930 alo = 298, ahi = 1101
2025-07-01 05:48:24.935 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:48:24.940 blo = 298, bhi = 1101
2025-07-01 05:48:24.944
2025-07-01 05:48:24.949 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:48:24.953 r"""
2025-07-01 05:48:24.958 When replacing one block of lines with another, search the blocks
2025-07-01 05:48:24.962 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:48:24.967 synch point, and intraline difference marking is done on the
2025-07-01 05:48:24.972 similar pair. Lots of work, but often worth it.
2025-07-01 05:48:24.978
2025-07-01 05:48:24.983 Example:
2025-07-01 05:48:24.988
2025-07-01 05:48:24.994 >>> d = Differ()
2025-07-01 05:48:25.002 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:48:25.009 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:48:25.015 >>> print(''.join(results), end="")
2025-07-01 05:48:25.021 - abcDefghiJkl
2025-07-01 05:48:25.032 + abcdefGhijkl
2025-07-01 05:48:25.043 """
2025-07-01 05:48:25.052
2025-07-01 05:48:25.062 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:48:25.073 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:48:25.082 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:48:25.090 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:48:25.097 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:48:25.104
2025-07-01 05:48:25.110 # search for the pair that matches best without being identical
2025-07-01 05:48:25.116 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:48:25.128 # on junk -- unless we have to)
2025-07-01 05:48:25.140 for j in range(blo, bhi):
2025-07-01 05:48:25.152 bj = b[j]
2025-07-01 05:48:25.163 cruncher.set_seq2(bj)
2025-07-01 05:48:25.176 for i in range(alo, ahi):
2025-07-01 05:48:25.188 ai = a[i]
2025-07-01 05:48:25.200 if ai == bj:
2025-07-01 05:48:25.209 if eqi is None:
2025-07-01 05:48:25.220 eqi, eqj = i, j
2025-07-01 05:48:25.230 continue
2025-07-01 05:48:25.242 cruncher.set_seq1(ai)
2025-07-01 05:48:25.252 # computing similarity is expensive, so use the quick
2025-07-01 05:48:25.263 # upper bounds first -- have seen this speed up messy
2025-07-01 05:48:25.275 # compares by a factor of 3.
2025-07-01 05:48:25.286 # note that ratio() is only expensive to compute the first
2025-07-01 05:48:25.301 # time it's called on a sequence pair; the expensive part
2025-07-01 05:48:25.313 # of the computation is cached by cruncher
2025-07-01 05:48:25.323 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:48:25.329 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:48:25.338 cruncher.ratio() > best_ratio:
2025-07-01 05:48:25.345 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:48:25.352 if best_ratio < cutoff:
2025-07-01 05:48:25.360 # no non-identical "pretty close" pair
2025-07-01 05:48:25.367 if eqi is None:
2025-07-01 05:48:25.375 # no identical pair either -- treat it as a straight replace
2025-07-01 05:48:25.386 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:48:25.395 return
2025-07-01 05:48:25.401 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:48:25.408 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:48:25.414 else:
2025-07-01 05:48:25.426 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:48:25.436 eqi = None
2025-07-01 05:48:25.445
2025-07-01 05:48:25.452 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:48:25.458 # identical
2025-07-01 05:48:25.468
2025-07-01 05:48:25.479 # pump out diffs from before the synch point
2025-07-01 05:48:25.489 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:48:25.501
2025-07-01 05:48:25.511 # do intraline marking on the synch pair
2025-07-01 05:48:25.523 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:48:25.532 if eqi is None:
2025-07-01 05:48:25.540 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:48:25.546 atags = btags = ""
2025-07-01 05:48:25.553 cruncher.set_seqs(aelt, belt)
2025-07-01 05:48:25.559 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:48:25.565 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:48:25.576 if tag == 'replace':
2025-07-01 05:48:25.586 atags += '^' * la
2025-07-01 05:48:25.594 btags += '^' * lb
2025-07-01 05:48:25.600 elif tag == 'delete':
2025-07-01 05:48:25.605 atags += '-' * la
2025-07-01 05:48:25.610 elif tag == 'insert':
2025-07-01 05:48:25.614 btags += '+' * lb
2025-07-01 05:48:25.619 elif tag == 'equal':
2025-07-01 05:48:25.623 atags += ' ' * la
2025-07-01 05:48:25.627 btags += ' ' * lb
2025-07-01 05:48:25.632 else:
2025-07-01 05:48:25.636 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:48:25.641 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:48:25.645 else:
2025-07-01 05:48:25.649 # the synch pair is identical
2025-07-01 05:48:25.654 yield ' ' + aelt
2025-07-01 05:48:25.658
2025-07-01 05:48:25.662 # pump out diffs from after the synch point
2025-07-01 05:48:25.667 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:48:25.671
2025-07-01 05:48:25.677 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:48:25.682 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:48:25.688
2025-07-01 05:48:25.694 self = <difflib.Differ object at [hex]>
2025-07-01 05:48:25.699 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:48:25.703 alo = 299, ahi = 1101
2025-07-01 05:48:25.708 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:48:25.713 blo = 299, bhi = 1101
2025-07-01 05:48:25.717
2025-07-01 05:48:25.721 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:48:25.726 g = []
2025-07-01 05:48:25.732 if alo < ahi:
2025-07-01 05:48:25.737 if blo < bhi:
2025-07-01 05:48:25.743 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:48:25.750 else:
2025-07-01 05:48:25.761 g = self._dump('-', a, alo, ahi)
2025-07-01 05:48:25.774 elif blo < bhi:
2025-07-01 05:48:25.784 g = self._dump('+', b, blo, bhi)
2025-07-01 05:48:25.791
2025-07-01 05:48:25.798 > yield from g
2025-07-01 05:48:25.807
2025-07-01 05:48:25.818 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:48:25.825 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:48:25.831
2025-07-01 05:48:25.837 self = <difflib.Differ object at [hex]>
2025-07-01 05:48:25.847 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:48:25.858 alo = 299, ahi = 1101
2025-07-01 05:48:25.868 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:48:25.876 blo = 299, bhi = 1101
2025-07-01 05:48:25.881
2025-07-01 05:48:25.887 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:48:25.891 r"""
2025-07-01 05:48:25.896 When replacing one block of lines with another, search the blocks
2025-07-01 05:48:25.901 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:48:25.907 synch point, and intraline difference marking is done on the
2025-07-01 05:48:25.913 similar pair. Lots of work, but often worth it.
2025-07-01 05:48:25.919
2025-07-01 05:48:25.927 Example:
2025-07-01 05:48:25.938
2025-07-01 05:48:25.944 >>> d = Differ()
2025-07-01 05:48:25.950 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:48:25.955 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:48:25.960 >>> print(''.join(results), end="")
2025-07-01 05:48:25.965 - abcDefghiJkl
2025-07-01 05:48:25.974 + abcdefGhijkl
2025-07-01 05:48:25.984 """
2025-07-01 05:48:25.990
2025-07-01 05:48:25.996 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:48:26.002 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:48:26.010 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:48:26.018 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:48:26.026 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:48:26.035
2025-07-01 05:48:26.045 # search for the pair that matches best without being identical
2025-07-01 05:48:26.055 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:48:26.067 # on junk -- unless we have to)
2025-07-01 05:48:26.077 for j in range(blo, bhi):
2025-07-01 05:48:26.089 bj = b[j]
2025-07-01 05:48:26.098 cruncher.set_seq2(bj)
2025-07-01 05:48:26.105 for i in range(alo, ahi):
2025-07-01 05:48:26.111 ai = a[i]
2025-07-01 05:48:26.118 if ai == bj:
2025-07-01 05:48:26.127 if eqi is None:
2025-07-01 05:48:26.139 eqi, eqj = i, j
2025-07-01 05:48:26.147 continue
2025-07-01 05:48:26.153 cruncher.set_seq1(ai)
2025-07-01 05:48:26.162 # computing similarity is expensive, so use the quick
2025-07-01 05:48:26.172 # upper bounds first -- have seen this speed up messy
2025-07-01 05:48:26.185 # compares by a factor of 3.
2025-07-01 05:48:26.193 # note that ratio() is only expensive to compute the first
2025-07-01 05:48:26.200 # time it's called on a sequence pair; the expensive part
2025-07-01 05:48:26.207 # of the computation is cached by cruncher
2025-07-01 05:48:26.214 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:48:26.221 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:48:26.226 cruncher.ratio() > best_ratio:
2025-07-01 05:48:26.231 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:48:26.236 if best_ratio < cutoff:
2025-07-01 05:48:26.241 # no non-identical "pretty close" pair
2025-07-01 05:48:26.246 if eqi is None:
2025-07-01 05:48:26.251 # no identical pair either -- treat it as a straight replace
2025-07-01 05:48:26.256 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:48:26.263 return
2025-07-01 05:48:26.274 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:48:26.283 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:48:26.290 else:
2025-07-01 05:48:26.299 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:48:26.306 eqi = None
2025-07-01 05:48:26.312
2025-07-01 05:48:26.319 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:48:26.325 # identical
2025-07-01 05:48:26.331
2025-07-01 05:48:26.339 # pump out diffs from before the synch point
2025-07-01 05:48:26.350 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:48:26.359
2025-07-01 05:48:26.369 # do intraline marking on the synch pair
2025-07-01 05:48:26.377 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:48:26.387 if eqi is None:
2025-07-01 05:48:26.395 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:48:26.402 atags = btags = ""
2025-07-01 05:48:26.407 cruncher.set_seqs(aelt, belt)
2025-07-01 05:48:26.412 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:48:26.417 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:48:26.421 if tag == 'replace':
2025-07-01 05:48:26.426 atags += '^' * la
2025-07-01 05:48:26.432 btags += '^' * lb
2025-07-01 05:48:26.437 elif tag == 'delete':
2025-07-01 05:48:26.441 atags += '-' * la
2025-07-01 05:48:26.446 elif tag == 'insert':
2025-07-01 05:48:26.453 btags += '+' * lb
2025-07-01 05:48:26.462 elif tag == 'equal':
2025-07-01 05:48:26.468 atags += ' ' * la
2025-07-01 05:48:26.474 btags += ' ' * lb
2025-07-01 05:48:26.479 else:
2025-07-01 05:48:26.486 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:48:26.495 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:48:26.502 else:
2025-07-01 05:48:26.510 # the synch pair is identical
2025-07-01 05:48:26.517 yield ' ' + aelt
2025-07-01 05:48:26.523
2025-07-01 05:48:26.529 # pump out diffs from after the synch point
2025-07-01 05:48:26.542 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:48:26.551
2025-07-01 05:48:26.558 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:48:26.570 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:48:26.581
2025-07-01 05:48:26.589 self = <difflib.Differ object at [hex]>
2025-07-01 05:48:26.597 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:48:26.604 alo = 300, ahi = 1101
2025-07-01 05:48:26.614 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:48:26.621 blo = 300, bhi = 1101
2025-07-01 05:48:26.627
2025-07-01 05:48:26.632 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:48:26.637 g = []
2025-07-01 05:48:26.644 if alo < ahi:
2025-07-01 05:48:26.650 if blo < bhi:
2025-07-01 05:48:26.656 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:48:26.663 else:
2025-07-01 05:48:26.669 g = self._dump('-', a, alo, ahi)
2025-07-01 05:48:26.675 elif blo < bhi:
2025-07-01 05:48:26.681 g = self._dump('+', b, blo, bhi)
2025-07-01 05:48:26.686
2025-07-01 05:48:26.696 > yield from g
2025-07-01 05:48:26.701
2025-07-01 05:48:26.706 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:48:26.711 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:48:26.717
2025-07-01 05:48:26.723 self = <difflib.Differ object at [hex]>
2025-07-01 05:48:26.737 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:48:26.748 alo = 300, ahi = 1101
2025-07-01 05:48:26.760 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:48:26.771 blo = 300, bhi = 1101
2025-07-01 05:48:26.783
2025-07-01 05:48:26.796 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:48:26.809 r"""
2025-07-01 05:48:26.820 When replacing one block of lines with another, search the blocks
2025-07-01 05:48:26.829 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:48:26.836 synch point, and intraline difference marking is done on the
2025-07-01 05:48:26.843 similar pair. Lots of work, but often worth it.
2025-07-01 05:48:26.849
2025-07-01 05:48:26.855 Example:
2025-07-01 05:48:26.860
2025-07-01 05:48:26.866 >>> d = Differ()
2025-07-01 05:48:26.876 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:48:26.884 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:48:26.895 >>> print(''.join(results), end="")
2025-07-01 05:48:26.905 - abcDefghiJkl
2025-07-01 05:48:26.925 + abcdefGhijkl
2025-07-01 05:48:26.949 """
2025-07-01 05:48:26.957
2025-07-01 05:48:26.965 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:48:26.976 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:48:26.984 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:48:26.992 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:48:26.999 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:48:27.006
2025-07-01 05:48:27.018 # search for the pair that matches best without being identical
2025-07-01 05:48:27.027 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:48:27.034 # on junk -- unless we have to)
2025-07-01 05:48:27.045 for j in range(blo, bhi):
2025-07-01 05:48:27.055 bj = b[j]
2025-07-01 05:48:27.063 cruncher.set_seq2(bj)
2025-07-01 05:48:27.071 for i in range(alo, ahi):
2025-07-01 05:48:27.082 ai = a[i]
2025-07-01 05:48:27.091 if ai == bj:
2025-07-01 05:48:27.098 if eqi is None:
2025-07-01 05:48:27.106 eqi, eqj = i, j
2025-07-01 05:48:27.117 continue
2025-07-01 05:48:27.127 cruncher.set_seq1(ai)
2025-07-01 05:48:27.135 # computing similarity is expensive, so use the quick
2025-07-01 05:48:27.143 # upper bounds first -- have seen this speed up messy
2025-07-01 05:48:27.149 # compares by a factor of 3.
2025-07-01 05:48:27.156 # note that ratio() is only expensive to compute the first
2025-07-01 05:48:27.162 # time it's called on a sequence pair; the expensive part
2025-07-01 05:48:27.168 # of the computation is cached by cruncher
2025-07-01 05:48:27.174 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:48:27.184 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:48:27.193 cruncher.ratio() > best_ratio:
2025-07-01 05:48:27.206 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:48:27.218 if best_ratio < cutoff:
2025-07-01 05:48:27.228 # no non-identical "pretty close" pair
2025-07-01 05:48:27.239 if eqi is None:
2025-07-01 05:48:27.249 # no identical pair either -- treat it as a straight replace
2025-07-01 05:48:27.259 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:48:27.271 return
2025-07-01 05:48:27.280 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:48:27.288 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:48:27.294 else:
2025-07-01 05:48:27.305 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:48:27.315 eqi = None
2025-07-01 05:48:27.323
2025-07-01 05:48:27.331 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:48:27.343 # identical
2025-07-01 05:48:27.351
2025-07-01 05:48:27.358 # pump out diffs from before the synch point
2025-07-01 05:48:27.369 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:48:27.375
2025-07-01 05:48:27.382 # do intraline marking on the synch pair
2025-07-01 05:48:27.393 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:48:27.404 if eqi is None:
2025-07-01 05:48:27.413 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:48:27.421 atags = btags = ""
2025-07-01 05:48:27.428 cruncher.set_seqs(aelt, belt)
2025-07-01 05:48:27.434 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:48:27.440 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:48:27.445 if tag == 'replace':
2025-07-01 05:48:27.451 atags += '^' * la
2025-07-01 05:48:27.458 btags += '^' * lb
2025-07-01 05:48:27.463 elif tag == 'delete':
2025-07-01 05:48:27.469 atags += '-' * la
2025-07-01 05:48:27.475 elif tag == 'insert':
2025-07-01 05:48:27.480 btags += '+' * lb
2025-07-01 05:48:27.486 elif tag == 'equal':
2025-07-01 05:48:27.491 atags += ' ' * la
2025-07-01 05:48:27.499 btags += ' ' * lb
2025-07-01 05:48:27.509 else:
2025-07-01 05:48:27.517 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:48:27.524 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:48:27.530 else:
2025-07-01 05:48:27.536 # the synch pair is identical
2025-07-01 05:48:27.542 yield ' ' + aelt
2025-07-01 05:48:27.548
2025-07-01 05:48:27.554 # pump out diffs from after the synch point
2025-07-01 05:48:27.564 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:48:27.573
2025-07-01 05:48:27.580 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:48:27.586 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:48:27.591
2025-07-01 05:48:27.599 self = <difflib.Differ object at [hex]>
2025-07-01 05:48:27.609 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:48:27.617 alo = 301, ahi = 1101
2025-07-01 05:48:27.626 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:48:27.634 blo = 301, bhi = 1101
2025-07-01 05:48:27.641
2025-07-01 05:48:27.647 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:48:27.655 g = []
2025-07-01 05:48:27.667 if alo < ahi:
2025-07-01 05:48:27.677 if blo < bhi:
2025-07-01 05:48:27.685 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:48:27.691 else:
2025-07-01 05:48:27.698 g = self._dump('-', a, alo, ahi)
2025-07-01 05:48:27.703 elif blo < bhi:
2025-07-01 05:48:27.708 g = self._dump('+', b, blo, bhi)
2025-07-01 05:48:27.714
2025-07-01 05:48:27.719 > yield from g
2025-07-01 05:48:27.725
2025-07-01 05:48:27.731 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:48:27.738 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:48:27.743
2025-07-01 05:48:27.749 self = <difflib.Differ object at [hex]>
2025-07-01 05:48:27.754 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:48:27.760 alo = 301, ahi = 1101
2025-07-01 05:48:27.766 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:48:27.772 blo = 301, bhi = 1101
2025-07-01 05:48:27.779
2025-07-01 05:48:27.789 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:48:27.796 r"""
2025-07-01 05:48:27.802 When replacing one block of lines with another, search the blocks
2025-07-01 05:48:27.807 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:48:27.813 synch point, and intraline difference marking is done on the
2025-07-01 05:48:27.819 similar pair. Lots of work, but often worth it.
2025-07-01 05:48:27.825
2025-07-01 05:48:27.831 Example:
2025-07-01 05:48:27.838
2025-07-01 05:48:27.844 >>> d = Differ()
2025-07-01 05:48:27.851 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:48:27.858 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:48:27.864 >>> print(''.join(results), end="")
2025-07-01 05:48:27.871 - abcDefghiJkl
2025-07-01 05:48:27.884 + abcdefGhijkl
2025-07-01 05:48:27.897 """
2025-07-01 05:48:27.903
2025-07-01 05:48:27.910 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:48:27.915 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:48:27.920 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:48:27.926 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:48:27.931 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:48:27.937
2025-07-01 05:48:27.943 # search for the pair that matches best without being identical
2025-07-01 05:48:27.950 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:48:27.961 # on junk -- unless we have to)
2025-07-01 05:48:27.969 for j in range(blo, bhi):
2025-07-01 05:48:27.977 bj = b[j]
2025-07-01 05:48:27.984 cruncher.set_seq2(bj)
2025-07-01 05:48:27.997 for i in range(alo, ahi):
2025-07-01 05:48:28.008 ai = a[i]
2025-07-01 05:48:28.016 if ai == bj:
2025-07-01 05:48:28.023 if eqi is None:
2025-07-01 05:48:28.030 eqi, eqj = i, j
2025-07-01 05:48:28.042 continue
2025-07-01 05:48:28.052 cruncher.set_seq1(ai)
2025-07-01 05:48:28.064 # computing similarity is expensive, so use the quick
2025-07-01 05:48:28.073 # upper bounds first -- have seen this speed up messy
2025-07-01 05:48:28.080 # compares by a factor of 3.
2025-07-01 05:48:28.086 # note that ratio() is only expensive to compute the first
2025-07-01 05:48:28.091 # time it's called on a sequence pair; the expensive part
2025-07-01 05:48:28.096 # of the computation is cached by cruncher
2025-07-01 05:48:28.105 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:48:28.111 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:48:28.116 cruncher.ratio() > best_ratio:
2025-07-01 05:48:28.127 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:48:28.134 if best_ratio < cutoff:
2025-07-01 05:48:28.141 # no non-identical "pretty close" pair
2025-07-01 05:48:28.147 if eqi is None:
2025-07-01 05:48:28.152 # no identical pair either -- treat it as a straight replace
2025-07-01 05:48:28.159 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:48:28.166 return
2025-07-01 05:48:28.174 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:48:28.182 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:48:28.188 else:
2025-07-01 05:48:28.195 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:48:28.200 eqi = None
2025-07-01 05:48:28.206
2025-07-01 05:48:28.217 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:48:28.228 # identical
2025-07-01 05:48:28.239
2025-07-01 05:48:28.249 # pump out diffs from before the synch point
2025-07-01 05:48:28.262 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:48:28.270
2025-07-01 05:48:28.279 # do intraline marking on the synch pair
2025-07-01 05:48:28.289 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:48:28.300 if eqi is None:
2025-07-01 05:48:28.313 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:48:28.323 atags = btags = ""
2025-07-01 05:48:28.331 cruncher.set_seqs(aelt, belt)
2025-07-01 05:48:28.339 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:48:28.348 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:48:28.359 if tag == 'replace':
2025-07-01 05:48:28.369 atags += '^' * la
2025-07-01 05:48:28.378 btags += '^' * lb
2025-07-01 05:48:28.390 elif tag == 'delete':
2025-07-01 05:48:28.402 atags += '-' * la
2025-07-01 05:48:28.413 elif tag == 'insert':
2025-07-01 05:48:28.425 btags += '+' * lb
2025-07-01 05:48:28.436 elif tag == 'equal':
2025-07-01 05:48:28.447 atags += ' ' * la
2025-07-01 05:48:28.457 btags += ' ' * lb
2025-07-01 05:48:28.465 else:
2025-07-01 05:48:28.477 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:48:28.488 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:48:28.499 else:
2025-07-01 05:48:28.508 # the synch pair is identical
2025-07-01 05:48:28.517 yield ' ' + aelt
2025-07-01 05:48:28.528
2025-07-01 05:48:28.539 # pump out diffs from after the synch point
2025-07-01 05:48:28.551 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:48:28.563
2025-07-01 05:48:28.576 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:48:28.587 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:48:28.596
2025-07-01 05:48:28.603 self = <difflib.Differ object at [hex]>
2025-07-01 05:48:28.610 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:48:28.623 alo = 302, ahi = 1101
2025-07-01 05:48:28.636 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:48:28.643 blo = 302, bhi = 1101
2025-07-01 05:48:28.650
2025-07-01 05:48:28.656 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:48:28.664 g = []
2025-07-01 05:48:28.671 if alo < ahi:
2025-07-01 05:48:28.677 if blo < bhi:
2025-07-01 05:48:28.684 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:48:28.691 else:
2025-07-01 05:48:28.699 g = self._dump('-', a, alo, ahi)
2025-07-01 05:48:28.711 elif blo < bhi:
2025-07-01 05:48:28.718 g = self._dump('+', b, blo, bhi)
2025-07-01 05:48:28.726
2025-07-01 05:48:28.732 > yield from g
2025-07-01 05:48:28.739
2025-07-01 05:48:28.748 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:48:28.759 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:48:28.767
2025-07-01 05:48:28.774 self = <difflib.Differ object at [hex]>
2025-07-01 05:48:28.780 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:48:28.787 alo = 302, ahi = 1101
2025-07-01 05:48:28.795 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:48:28.801 blo = 302, bhi = 1101
2025-07-01 05:48:28.806
2025-07-01 05:48:28.811 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:48:28.815 r"""
2025-07-01 05:48:28.820 When replacing one block of lines with another, search the blocks
2025-07-01 05:48:28.825 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:48:28.829 synch point, and intraline difference marking is done on the
2025-07-01 05:48:28.835 similar pair. Lots of work, but often worth it.
2025-07-01 05:48:28.840
2025-07-01 05:48:28.846 Example:
2025-07-01 05:48:28.851
2025-07-01 05:48:28.857 >>> d = Differ()
2025-07-01 05:48:28.862 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:48:28.868 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:48:28.874 >>> print(''.join(results), end="")
2025-07-01 05:48:28.883 - abcDefghiJkl
2025-07-01 05:48:28.900 + abcdefGhijkl
2025-07-01 05:48:28.916 """
2025-07-01 05:48:28.926
2025-07-01 05:48:28.935 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:48:28.946 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:48:28.954 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:48:28.963 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:48:28.974 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:48:28.982
2025-07-01 05:48:28.994 # search for the pair that matches best without being identical
2025-07-01 05:48:29.006 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:48:29.021 # on junk -- unless we have to)
2025-07-01 05:48:29.033 for j in range(blo, bhi):
2025-07-01 05:48:29.043 bj = b[j]
2025-07-01 05:48:29.053 cruncher.set_seq2(bj)
2025-07-01 05:48:29.064 for i in range(alo, ahi):
2025-07-01 05:48:29.074 ai = a[i]
2025-07-01 05:48:29.082 if ai == bj:
2025-07-01 05:48:29.089 if eqi is None:
2025-07-01 05:48:29.095 eqi, eqj = i, j
2025-07-01 05:48:29.100 continue
2025-07-01 05:48:29.105 cruncher.set_seq1(ai)
2025-07-01 05:48:29.110 # computing similarity is expensive, so use the quick
2025-07-01 05:48:29.114 # upper bounds first -- have seen this speed up messy
2025-07-01 05:48:29.119 # compares by a factor of 3.
2025-07-01 05:48:29.124 # note that ratio() is only expensive to compute the first
2025-07-01 05:48:29.132 # time it's called on a sequence pair; the expensive part
2025-07-01 05:48:29.146 # of the computation is cached by cruncher
2025-07-01 05:48:29.156 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:48:29.163 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:48:29.175 cruncher.ratio() > best_ratio:
2025-07-01 05:48:29.185 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:48:29.197 if best_ratio < cutoff:
2025-07-01 05:48:29.207 # no non-identical "pretty close" pair
2025-07-01 05:48:29.216 if eqi is None:
2025-07-01 05:48:29.223 # no identical pair either -- treat it as a straight replace
2025-07-01 05:48:29.232 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:48:29.244 return
2025-07-01 05:48:29.257 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:48:29.269 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:48:29.280 else:
2025-07-01 05:48:29.291 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:48:29.304 eqi = None
2025-07-01 05:48:29.317
2025-07-01 05:48:29.331 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:48:29.343 # identical
2025-07-01 05:48:29.352
2025-07-01 05:48:29.361 # pump out diffs from before the synch point
2025-07-01 05:48:29.368 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:48:29.377
2025-07-01 05:48:29.383 # do intraline marking on the synch pair
2025-07-01 05:48:29.388 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:48:29.394 if eqi is None:
2025-07-01 05:48:29.400 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:48:29.407 atags = btags = ""
2025-07-01 05:48:29.414 cruncher.set_seqs(aelt, belt)
2025-07-01 05:48:29.422 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:48:29.429 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:48:29.435 if tag == 'replace':
2025-07-01 05:48:29.441 atags += '^' * la
2025-07-01 05:48:29.447 btags += '^' * lb
2025-07-01 05:48:29.452 elif tag == 'delete':
2025-07-01 05:48:29.458 atags += '-' * la
2025-07-01 05:48:29.468 elif tag == 'insert':
2025-07-01 05:48:29.479 btags += '+' * lb
2025-07-01 05:48:29.487 elif tag == 'equal':
2025-07-01 05:48:29.495 atags += ' ' * la
2025-07-01 05:48:29.505 btags += ' ' * lb
2025-07-01 05:48:29.515 else:
2025-07-01 05:48:29.525 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:48:29.538 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:48:29.548 else:
2025-07-01 05:48:29.556 # the synch pair is identical
2025-07-01 05:48:29.564 yield ' ' + aelt
2025-07-01 05:48:29.570
2025-07-01 05:48:29.580 # pump out diffs from after the synch point
2025-07-01 05:48:29.589 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:48:29.596
2025-07-01 05:48:29.602 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:48:29.614 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:48:29.624
2025-07-01 05:48:29.632 self = <difflib.Differ object at [hex]>
2025-07-01 05:48:29.640 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:48:29.647 alo = 303, ahi = 1101
2025-07-01 05:48:29.657 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:48:29.669 blo = 303, bhi = 1101
2025-07-01 05:48:29.679
2025-07-01 05:48:29.687 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:48:29.698 g = []
2025-07-01 05:48:29.707 if alo < ahi:
2025-07-01 05:48:29.715 if blo < bhi:
2025-07-01 05:48:29.723 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:48:29.730 else:
2025-07-01 05:48:29.741 g = self._dump('-', a, alo, ahi)
2025-07-01 05:48:29.750 elif blo < bhi:
2025-07-01 05:48:29.756 g = self._dump('+', b, blo, bhi)
2025-07-01 05:48:29.763
2025-07-01 05:48:29.769 > yield from g
2025-07-01 05:48:29.776
2025-07-01 05:48:29.782 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:48:29.790 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:48:29.799
2025-07-01 05:48:29.806 self = <difflib.Differ object at [hex]>
2025-07-01 05:48:29.814 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:48:29.821 alo = 303, ahi = 1101
2025-07-01 05:48:29.828 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:48:29.834 blo = 303, bhi = 1101
2025-07-01 05:48:29.839
2025-07-01 05:48:29.845 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:48:29.851 r"""
2025-07-01 05:48:29.858 When replacing one block of lines with another, search the blocks
2025-07-01 05:48:29.865 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:48:29.872 synch point, and intraline difference marking is done on the
2025-07-01 05:48:29.879 similar pair. Lots of work, but often worth it.
2025-07-01 05:48:29.887
2025-07-01 05:48:29.897 Example:
2025-07-01 05:48:29.905
2025-07-01 05:48:29.912 >>> d = Differ()
2025-07-01 05:48:29.917 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:48:29.922 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:48:29.927 >>> print(''.join(results), end="")
2025-07-01 05:48:29.931 - abcDefghiJkl
2025-07-01 05:48:29.940 + abcdefGhijkl
2025-07-01 05:48:29.949 """
2025-07-01 05:48:29.954
2025-07-01 05:48:29.959 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:48:29.965 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:48:29.970 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:48:29.977 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:48:29.984 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:48:29.990
2025-07-01 05:48:29.996 # search for the pair that matches best without being identical
2025-07-01 05:48:30.000 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:48:30.006 # on junk -- unless we have to)
2025-07-01 05:48:30.012 for j in range(blo, bhi):
2025-07-01 05:48:30.019 bj = b[j]
2025-07-01 05:48:30.027 cruncher.set_seq2(bj)
2025-07-01 05:48:30.033 for i in range(alo, ahi):
2025-07-01 05:48:30.038 ai = a[i]
2025-07-01 05:48:30.046 if ai == bj:
2025-07-01 05:48:30.057 if eqi is None:
2025-07-01 05:48:30.066 eqi, eqj = i, j
2025-07-01 05:48:30.076 continue
2025-07-01 05:48:30.084 cruncher.set_seq1(ai)
2025-07-01 05:48:30.092 # computing similarity is expensive, so use the quick
2025-07-01 05:48:30.099 # upper bounds first -- have seen this speed up messy
2025-07-01 05:48:30.107 # compares by a factor of 3.
2025-07-01 05:48:30.119 # note that ratio() is only expensive to compute the first
2025-07-01 05:48:30.127 # time it's called on a sequence pair; the expensive part
2025-07-01 05:48:30.134 # of the computation is cached by cruncher
2025-07-01 05:48:30.142 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:48:30.149 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:48:30.156 cruncher.ratio() > best_ratio:
2025-07-01 05:48:30.162 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:48:30.171 if best_ratio < cutoff:
2025-07-01 05:48:30.179 # no non-identical "pretty close" pair
2025-07-01 05:48:30.192 if eqi is None:
2025-07-01 05:48:30.205 # no identical pair either -- treat it as a straight replace
2025-07-01 05:48:30.219 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:48:30.228 return
2025-07-01 05:48:30.236 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:48:30.243 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:48:30.255 else:
2025-07-01 05:48:30.268 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:48:30.280 eqi = None
2025-07-01 05:48:30.288
2025-07-01 05:48:30.296 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:48:30.305 # identical
2025-07-01 05:48:30.312
2025-07-01 05:48:30.318 # pump out diffs from before the synch point
2025-07-01 05:48:30.325 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:48:30.331
2025-07-01 05:48:30.337 # do intraline marking on the synch pair
2025-07-01 05:48:30.346 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:48:30.357 if eqi is None:
2025-07-01 05:48:30.366 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:48:30.375 atags = btags = ""
2025-07-01 05:48:30.388 cruncher.set_seqs(aelt, belt)
2025-07-01 05:48:30.399 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:48:30.407 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:48:30.414 if tag == 'replace':
2025-07-01 05:48:30.421 atags += '^' * la
2025-07-01 05:48:30.427 btags += '^' * lb
2025-07-01 05:48:30.432 elif tag == 'delete':
2025-07-01 05:48:30.438 atags += '-' * la
2025-07-01 05:48:30.450 elif tag == 'insert':
2025-07-01 05:48:30.461 btags += '+' * lb
2025-07-01 05:48:30.472 elif tag == 'equal':
2025-07-01 05:48:30.482 atags += ' ' * la
2025-07-01 05:48:30.489 btags += ' ' * lb
2025-07-01 05:48:30.495 else:
2025-07-01 05:48:30.502 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:48:30.509 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:48:30.514 else:
2025-07-01 05:48:30.519 # the synch pair is identical
2025-07-01 05:48:30.525 yield ' ' + aelt
2025-07-01 05:48:30.534
2025-07-01 05:48:30.547 # pump out diffs from after the synch point
2025-07-01 05:48:30.554 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:48:30.563
2025-07-01 05:48:30.575 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:48:30.585 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:48:30.593
2025-07-01 05:48:30.605 self = <difflib.Differ object at [hex]>
2025-07-01 05:48:30.615 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:48:30.622 alo = 304, ahi = 1101
2025-07-01 05:48:30.629 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:48:30.634 blo = 304, bhi = 1101
2025-07-01 05:48:30.640
2025-07-01 05:48:30.646 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:48:30.651 g = []
2025-07-01 05:48:30.656 if alo < ahi:
2025-07-01 05:48:30.662 if blo < bhi:
2025-07-01 05:48:30.668 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:48:30.675 else:
2025-07-01 05:48:30.687 g = self._dump('-', a, alo, ahi)
2025-07-01 05:48:30.697 elif blo < bhi:
2025-07-01 05:48:30.705 g = self._dump('+', b, blo, bhi)
2025-07-01 05:48:30.712
2025-07-01 05:48:30.718 > yield from g
2025-07-01 05:48:30.729
2025-07-01 05:48:30.738 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:48:30.745 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:48:30.754
2025-07-01 05:48:30.760 self = <difflib.Differ object at [hex]>
2025-07-01 05:48:30.766 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:48:30.770 alo = 304, ahi = 1101
2025-07-01 05:48:30.776 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:48:30.781 blo = 304, bhi = 1101
2025-07-01 05:48:30.787
2025-07-01 05:48:30.795 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:48:30.806 r"""
2025-07-01 05:48:30.816 When replacing one block of lines with another, search the blocks
2025-07-01 05:48:30.823 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:48:30.833 synch point, and intraline difference marking is done on the
2025-07-01 05:48:30.841 similar pair. Lots of work, but often worth it.
2025-07-01 05:48:30.848
2025-07-01 05:48:30.854 Example:
2025-07-01 05:48:30.866
2025-07-01 05:48:30.873 >>> d = Differ()
2025-07-01 05:48:30.881 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:48:30.892 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:48:30.902 >>> print(''.join(results), end="")
2025-07-01 05:48:30.911 - abcDefghiJkl
2025-07-01 05:48:30.932 + abcdefGhijkl
2025-07-01 05:48:30.952 """
2025-07-01 05:48:30.962
2025-07-01 05:48:30.971 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:48:30.979 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:48:30.985 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:48:30.991 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:48:30.997 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:48:31.003
2025-07-01 05:48:31.009 # search for the pair that matches best without being identical
2025-07-01 05:48:31.015 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:48:31.025 # on junk -- unless we have to)
2025-07-01 05:48:31.035 for j in range(blo, bhi):
2025-07-01 05:48:31.047 bj = b[j]
2025-07-01 05:48:31.059 cruncher.set_seq2(bj)
2025-07-01 05:48:31.069 for i in range(alo, ahi):
2025-07-01 05:48:31.076 ai = a[i]
2025-07-01 05:48:31.082 if ai == bj:
2025-07-01 05:48:31.088 if eqi is None:
2025-07-01 05:48:31.095 eqi, eqj = i, j
2025-07-01 05:48:31.101 continue
2025-07-01 05:48:31.109 cruncher.set_seq1(ai)
2025-07-01 05:48:31.116 # computing similarity is expensive, so use the quick
2025-07-01 05:48:31.123 # upper bounds first -- have seen this speed up messy
2025-07-01 05:48:31.131 # compares by a factor of 3.
2025-07-01 05:48:31.142 # note that ratio() is only expensive to compute the first
2025-07-01 05:48:31.150 # time it's called on a sequence pair; the expensive part
2025-07-01 05:48:31.156 # of the computation is cached by cruncher
2025-07-01 05:48:31.162 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:48:31.166 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:48:31.177 cruncher.ratio() > best_ratio:
2025-07-01 05:48:31.186 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:48:31.193 if best_ratio < cutoff:
2025-07-01 05:48:31.199 # no non-identical "pretty close" pair
2025-07-01 05:48:31.205 if eqi is None:
2025-07-01 05:48:31.211 # no identical pair either -- treat it as a straight replace
2025-07-01 05:48:31.218 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:48:31.224 return
2025-07-01 05:48:31.230 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:48:31.236 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:48:31.242 else:
2025-07-01 05:48:31.248 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:48:31.254 eqi = None
2025-07-01 05:48:31.260
2025-07-01 05:48:31.267 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:48:31.275 # identical
2025-07-01 05:48:31.286
2025-07-01 05:48:31.292 # pump out diffs from before the synch point
2025-07-01 05:48:31.299 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:48:31.307
2025-07-01 05:48:31.312 # do intraline marking on the synch pair
2025-07-01 05:48:31.318 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:48:31.324 if eqi is None:
2025-07-01 05:48:31.331 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:48:31.338 atags = btags = ""
2025-07-01 05:48:31.345 cruncher.set_seqs(aelt, belt)
2025-07-01 05:48:31.353 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:48:31.358 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:48:31.364 if tag == 'replace':
2025-07-01 05:48:31.370 atags += '^' * la
2025-07-01 05:48:31.378 btags += '^' * lb
2025-07-01 05:48:31.384 elif tag == 'delete':
2025-07-01 05:48:31.390 atags += '-' * la
2025-07-01 05:48:31.396 elif tag == 'insert':
2025-07-01 05:48:31.402 btags += '+' * lb
2025-07-01 05:48:31.407 elif tag == 'equal':
2025-07-01 05:48:31.413 atags += ' ' * la
2025-07-01 05:48:31.419 btags += ' ' * lb
2025-07-01 05:48:31.425 else:
2025-07-01 05:48:31.431 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:48:31.438 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:48:31.444 else:
2025-07-01 05:48:31.450 # the synch pair is identical
2025-07-01 05:48:31.456 yield ' ' + aelt
2025-07-01 05:48:31.462
2025-07-01 05:48:31.471 # pump out diffs from after the synch point
2025-07-01 05:48:31.478 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:48:31.487
2025-07-01 05:48:31.498 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:48:31.507 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:48:31.515
2025-07-01 05:48:31.527 self = <difflib.Differ object at [hex]>
2025-07-01 05:48:31.536 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:48:31.543 alo = 305, ahi = 1101
2025-07-01 05:48:31.551 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:48:31.559 blo = 305, bhi = 1101
2025-07-01 05:48:31.572
2025-07-01 05:48:31.582 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:48:31.589 g = []
2025-07-01 05:48:31.595 if alo < ahi:
2025-07-01 05:48:31.602 if blo < bhi:
2025-07-01 05:48:31.608 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:48:31.616 else:
2025-07-01 05:48:31.622 g = self._dump('-', a, alo, ahi)
2025-07-01 05:48:31.629 elif blo < bhi:
2025-07-01 05:48:31.634 g = self._dump('+', b, blo, bhi)
2025-07-01 05:48:31.640
2025-07-01 05:48:31.646 > yield from g
2025-07-01 05:48:31.654
2025-07-01 05:48:31.663 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:48:31.670 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:48:31.681
2025-07-01 05:48:31.691 self = <difflib.Differ object at [hex]>
2025-07-01 05:48:31.699 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:48:31.706 alo = 305, ahi = 1101
2025-07-01 05:48:31.712 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:48:31.723 blo = 305, bhi = 1101
2025-07-01 05:48:31.731
2025-07-01 05:48:31.738 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:48:31.745 r"""
2025-07-01 05:48:31.751 When replacing one block of lines with another, search the blocks
2025-07-01 05:48:31.759 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:48:31.769 synch point, and intraline difference marking is done on the
2025-07-01 05:48:31.777 similar pair. Lots of work, but often worth it.
2025-07-01 05:48:31.784
2025-07-01 05:48:31.791 Example:
2025-07-01 05:48:31.799
2025-07-01 05:48:31.808 >>> d = Differ()
2025-07-01 05:48:31.816 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:48:31.822 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:48:31.830 >>> print(''.join(results), end="")
2025-07-01 05:48:31.836 - abcDefghiJkl
2025-07-01 05:48:31.847 + abcdefGhijkl
2025-07-01 05:48:31.858 """
2025-07-01 05:48:31.868
2025-07-01 05:48:31.878 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:48:31.890 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:48:31.898 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:48:31.909 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:48:31.918 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:48:31.926
2025-07-01 05:48:31.939 # search for the pair that matches best without being identical
2025-07-01 05:48:31.948 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:48:31.957 # on junk -- unless we have to)
2025-07-01 05:48:31.964 for j in range(blo, bhi):
2025-07-01 05:48:31.970 bj = b[j]
2025-07-01 05:48:31.976 cruncher.set_seq2(bj)
2025-07-01 05:48:31.981 for i in range(alo, ahi):
2025-07-01 05:48:31.985 ai = a[i]
2025-07-01 05:48:31.990 if ai == bj:
2025-07-01 05:48:31.995 if eqi is None:
2025-07-01 05:48:32.000 eqi, eqj = i, j
2025-07-01 05:48:32.005 continue
2025-07-01 05:48:32.012 cruncher.set_seq1(ai)
2025-07-01 05:48:32.022 # computing similarity is expensive, so use the quick
2025-07-01 05:48:32.034 # upper bounds first -- have seen this speed up messy
2025-07-01 05:48:32.044 # compares by a factor of 3.
2025-07-01 05:48:32.054 # note that ratio() is only expensive to compute the first
2025-07-01 05:48:32.061 # time it's called on a sequence pair; the expensive part
2025-07-01 05:48:32.068 # of the computation is cached by cruncher
2025-07-01 05:48:32.074 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:48:32.085 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:48:32.097 cruncher.ratio() > best_ratio:
2025-07-01 05:48:32.107 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:48:32.112 if best_ratio < cutoff:
2025-07-01 05:48:32.118 # no non-identical "pretty close" pair
2025-07-01 05:48:32.123 if eqi is None:
2025-07-01 05:48:32.128 # no identical pair either -- treat it as a straight replace
2025-07-01 05:48:32.132 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:48:32.137 return
2025-07-01 05:48:32.141 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:48:32.146 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:48:32.151 else:
2025-07-01 05:48:32.157 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:48:32.163 eqi = None
2025-07-01 05:48:32.171
2025-07-01 05:48:32.182 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:48:32.191 # identical
2025-07-01 05:48:32.203
2025-07-01 05:48:32.213 # pump out diffs from before the synch point
2025-07-01 05:48:32.225 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:48:32.238
2025-07-01 05:48:32.251 # do intraline marking on the synch pair
2025-07-01 05:48:32.261 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:48:32.274 if eqi is None:
2025-07-01 05:48:32.285 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:48:32.298 atags = btags = ""
2025-07-01 05:48:32.311 cruncher.set_seqs(aelt, belt)
2025-07-01 05:48:32.322 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:48:32.333 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:48:32.344 if tag == 'replace':
2025-07-01 05:48:32.352 atags += '^' * la
2025-07-01 05:48:32.359 btags += '^' * lb
2025-07-01 05:48:32.367 elif tag == 'delete':
2025-07-01 05:48:32.375 atags += '-' * la
2025-07-01 05:48:32.388 elif tag == 'insert':
2025-07-01 05:48:32.400 btags += '+' * lb
2025-07-01 05:48:32.415 elif tag == 'equal':
2025-07-01 05:48:32.425 atags += ' ' * la
2025-07-01 05:48:32.432 btags += ' ' * lb
2025-07-01 05:48:32.438 else:
2025-07-01 05:48:32.447 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:48:32.457 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:48:32.466 else:
2025-07-01 05:48:32.471 # the synch pair is identical
2025-07-01 05:48:32.477 yield ' ' + aelt
2025-07-01 05:48:32.483
2025-07-01 05:48:32.488 # pump out diffs from after the synch point
2025-07-01 05:48:32.492 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:48:32.499
2025-07-01 05:48:32.507 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:48:32.512 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:48:32.518
2025-07-01 05:48:32.527 self = <difflib.Differ object at [hex]>
2025-07-01 05:48:32.535 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:48:32.541 alo = 306, ahi = 1101
2025-07-01 05:48:32.552 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:48:32.564 blo = 306, bhi = 1101
2025-07-01 05:48:32.577
2025-07-01 05:48:32.589 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:48:32.599 g = []
2025-07-01 05:48:32.607 if alo < ahi:
2025-07-01 05:48:32.613 if blo < bhi:
2025-07-01 05:48:32.619 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:48:32.624 else:
2025-07-01 05:48:32.630 g = self._dump('-', a, alo, ahi)
2025-07-01 05:48:32.636 elif blo < bhi:
2025-07-01 05:48:32.642 g = self._dump('+', b, blo, bhi)
2025-07-01 05:48:32.650
2025-07-01 05:48:32.659 > yield from g
2025-07-01 05:48:32.666
2025-07-01 05:48:32.672 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:48:32.678 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:48:32.684
2025-07-01 05:48:32.691 self = <difflib.Differ object at [hex]>
2025-07-01 05:48:32.700 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:48:32.707 alo = 306, ahi = 1101
2025-07-01 05:48:32.721 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:48:32.732 blo = 306, bhi = 1101
2025-07-01 05:48:32.742
2025-07-01 05:48:32.750 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:48:32.756 r"""
2025-07-01 05:48:32.761 When replacing one block of lines with another, search the blocks
2025-07-01 05:48:32.767 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:48:32.773 synch point, and intraline difference marking is done on the
2025-07-01 05:48:32.779 similar pair. Lots of work, but often worth it.
2025-07-01 05:48:32.785
2025-07-01 05:48:32.791 Example:
2025-07-01 05:48:32.799
2025-07-01 05:48:32.810 >>> d = Differ()
2025-07-01 05:48:32.818 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:48:32.824 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:48:32.830 >>> print(''.join(results), end="")
2025-07-01 05:48:32.835 - abcDefghiJkl
2025-07-01 05:48:32.846 + abcdefGhijkl
2025-07-01 05:48:32.861 """
2025-07-01 05:48:32.866
2025-07-01 05:48:32.872 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:48:32.878 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:48:32.883 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:48:32.889 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:48:32.897 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:48:32.906
2025-07-01 05:48:32.913 # search for the pair that matches best without being identical
2025-07-01 05:48:32.925 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:48:32.935 # on junk -- unless we have to)
2025-07-01 05:48:32.943 for j in range(blo, bhi):
2025-07-01 05:48:32.949 bj = b[j]
2025-07-01 05:48:32.955 cruncher.set_seq2(bj)
2025-07-01 05:48:32.960 for i in range(alo, ahi):
2025-07-01 05:48:32.964 ai = a[i]
2025-07-01 05:48:32.968 if ai == bj:
2025-07-01 05:48:32.973 if eqi is None:
2025-07-01 05:48:32.977 eqi, eqj = i, j
2025-07-01 05:48:32.988 continue
2025-07-01 05:48:32.996 cruncher.set_seq1(ai)
2025-07-01 05:48:33.004 # computing similarity is expensive, so use the quick
2025-07-01 05:48:33.012 # upper bounds first -- have seen this speed up messy
2025-07-01 05:48:33.021 # compares by a factor of 3.
2025-07-01 05:48:33.033 # note that ratio() is only expensive to compute the first
2025-07-01 05:48:33.041 # time it's called on a sequence pair; the expensive part
2025-07-01 05:48:33.047 # of the computation is cached by cruncher
2025-07-01 05:48:33.052 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:48:33.056 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:48:33.062 cruncher.ratio() > best_ratio:
2025-07-01 05:48:33.068 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:48:33.074 if best_ratio < cutoff:
2025-07-01 05:48:33.079 # no non-identical "pretty close" pair
2025-07-01 05:48:33.084 if eqi is None:
2025-07-01 05:48:33.088 # no identical pair either -- treat it as a straight replace
2025-07-01 05:48:33.093 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:48:33.097 return
2025-07-01 05:48:33.102 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:48:33.106 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:48:33.110 else:
2025-07-01 05:48:33.115 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:48:33.119 eqi = None
2025-07-01 05:48:33.124
2025-07-01 05:48:33.130 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:48:33.136 # identical
2025-07-01 05:48:33.141
2025-07-01 05:48:33.145 # pump out diffs from before the synch point
2025-07-01 05:48:33.150 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:48:33.155
2025-07-01 05:48:33.160 # do intraline marking on the synch pair
2025-07-01 05:48:33.165 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:48:33.171 if eqi is None:
2025-07-01 05:48:33.176 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:48:33.181 atags = btags = ""
2025-07-01 05:48:33.190 cruncher.set_seqs(aelt, belt)
2025-07-01 05:48:33.196 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:48:33.202 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:48:33.207 if tag == 'replace':
2025-07-01 05:48:33.213 atags += '^' * la
2025-07-01 05:48:33.223 btags += '^' * lb
2025-07-01 05:48:33.231 elif tag == 'delete':
2025-07-01 05:48:33.239 atags += '-' * la
2025-07-01 05:48:33.247 elif tag == 'insert':
2025-07-01 05:48:33.254 btags += '+' * lb
2025-07-01 05:48:33.260 elif tag == 'equal':
2025-07-01 05:48:33.266 atags += ' ' * la
2025-07-01 05:48:33.272 btags += ' ' * lb
2025-07-01 05:48:33.278 else:
2025-07-01 05:48:33.283 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:48:33.296 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:48:33.307 else:
2025-07-01 05:48:33.318 # the synch pair is identical
2025-07-01 05:48:33.331 yield ' ' + aelt
2025-07-01 05:48:33.342
2025-07-01 05:48:33.356 # pump out diffs from after the synch point
2025-07-01 05:48:33.369 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:48:33.379
2025-07-01 05:48:33.386 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:48:33.395 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:48:33.406
2025-07-01 05:48:33.414 self = <difflib.Differ object at [hex]>
2025-07-01 05:48:33.422 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:48:33.433 alo = 307, ahi = 1101
2025-07-01 05:48:33.441 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:48:33.447 blo = 307, bhi = 1101
2025-07-01 05:48:33.452
2025-07-01 05:48:33.463 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:48:33.470 g = []
2025-07-01 05:48:33.478 if alo < ahi:
2025-07-01 05:48:33.487 if blo < bhi:
2025-07-01 05:48:33.496 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:48:33.503 else:
2025-07-01 05:48:33.511 g = self._dump('-', a, alo, ahi)
2025-07-01 05:48:33.518 elif blo < bhi:
2025-07-01 05:48:33.529 g = self._dump('+', b, blo, bhi)
2025-07-01 05:48:33.538
2025-07-01 05:48:33.547 > yield from g
2025-07-01 05:48:33.554
2025-07-01 05:48:33.561 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:48:33.567 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:48:33.573
2025-07-01 05:48:33.585 self = <difflib.Differ object at [hex]>
2025-07-01 05:48:33.598 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:48:33.608 alo = 307, ahi = 1101
2025-07-01 05:48:33.617 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:48:33.627 blo = 307, bhi = 1101
2025-07-01 05:48:33.637
2025-07-01 05:48:33.647 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:48:33.659 r"""
2025-07-01 05:48:33.671 When replacing one block of lines with another, search the blocks
2025-07-01 05:48:33.683 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:48:33.696 synch point, and intraline difference marking is done on the
2025-07-01 05:48:33.705 similar pair. Lots of work, but often worth it.
2025-07-01 05:48:33.712
2025-07-01 05:48:33.719 Example:
2025-07-01 05:48:33.727
2025-07-01 05:48:33.739 >>> d = Differ()
2025-07-01 05:48:33.751 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:48:33.761 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:48:33.769 >>> print(''.join(results), end="")
2025-07-01 05:48:33.779 - abcDefghiJkl
2025-07-01 05:48:33.801 + abcdefGhijkl
2025-07-01 05:48:33.821 """
2025-07-01 05:48:33.828
2025-07-01 05:48:33.834 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:48:33.841 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:48:33.846 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:48:33.857 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:48:33.862 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:48:33.866
2025-07-01 05:48:33.872 # search for the pair that matches best without being identical
2025-07-01 05:48:33.877 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:48:33.881 # on junk -- unless we have to)
2025-07-01 05:48:33.886 for j in range(blo, bhi):
2025-07-01 05:48:33.890 bj = b[j]
2025-07-01 05:48:33.895 cruncher.set_seq2(bj)
2025-07-01 05:48:33.899 for i in range(alo, ahi):
2025-07-01 05:48:33.904 ai = a[i]
2025-07-01 05:48:33.908 if ai == bj:
2025-07-01 05:48:33.913 if eqi is None:
2025-07-01 05:48:33.918 eqi, eqj = i, j
2025-07-01 05:48:33.924 continue
2025-07-01 05:48:33.930 cruncher.set_seq1(ai)
2025-07-01 05:48:33.935 # computing similarity is expensive, so use the quick
2025-07-01 05:48:33.941 # upper bounds first -- have seen this speed up messy
2025-07-01 05:48:33.947 # compares by a factor of 3.
2025-07-01 05:48:33.953 # note that ratio() is only expensive to compute the first
2025-07-01 05:48:33.959 # time it's called on a sequence pair; the expensive part
2025-07-01 05:48:33.964 # of the computation is cached by cruncher
2025-07-01 05:48:33.970 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:48:33.981 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:48:33.990 cruncher.ratio() > best_ratio:
2025-07-01 05:48:33.997 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:48:34.004 if best_ratio < cutoff:
2025-07-01 05:48:34.010 # no non-identical "pretty close" pair
2025-07-01 05:48:34.022 if eqi is None:
2025-07-01 05:48:34.033 # no identical pair either -- treat it as a straight replace
2025-07-01 05:48:34.042 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:48:34.051 return
2025-07-01 05:48:34.060 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:48:34.069 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:48:34.075 else:
2025-07-01 05:48:34.082 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:48:34.090 eqi = None
2025-07-01 05:48:34.097
2025-07-01 05:48:34.103 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:48:34.108 # identical
2025-07-01 05:48:34.113
2025-07-01 05:48:34.118 # pump out diffs from before the synch point
2025-07-01 05:48:34.123 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:48:34.128
2025-07-01 05:48:34.132 # do intraline marking on the synch pair
2025-07-01 05:48:34.137 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:48:34.141 if eqi is None:
2025-07-01 05:48:34.149 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:48:34.161 atags = btags = ""
2025-07-01 05:48:34.171 cruncher.set_seqs(aelt, belt)
2025-07-01 05:48:34.179 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:48:34.186 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:48:34.193 if tag == 'replace':
2025-07-01 05:48:34.200 atags += '^' * la
2025-07-01 05:48:34.206 btags += '^' * lb
2025-07-01 05:48:34.217 elif tag == 'delete':
2025-07-01 05:48:34.227 atags += '-' * la
2025-07-01 05:48:34.235 elif tag == 'insert':
2025-07-01 05:48:34.242 btags += '+' * lb
2025-07-01 05:48:34.250 elif tag == 'equal':
2025-07-01 05:48:34.261 atags += ' ' * la
2025-07-01 05:48:34.270 btags += ' ' * lb
2025-07-01 05:48:34.278 else:
2025-07-01 05:48:34.286 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:48:34.293 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:48:34.299 else:
2025-07-01 05:48:34.304 # the synch pair is identical
2025-07-01 05:48:34.310 yield ' ' + aelt
2025-07-01 05:48:34.315
2025-07-01 05:48:34.323 # pump out diffs from after the synch point
2025-07-01 05:48:34.333 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:48:34.341
2025-07-01 05:48:34.348 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:48:34.353 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:48:34.359
2025-07-01 05:48:34.366 self = <difflib.Differ object at [hex]>
2025-07-01 05:48:34.377 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:48:34.386 alo = 308, ahi = 1101
2025-07-01 05:48:34.395 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:48:34.403 blo = 308, bhi = 1101
2025-07-01 05:48:34.409
2025-07-01 05:48:34.415 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:48:34.421 g = []
2025-07-01 05:48:34.426 if alo < ahi:
2025-07-01 05:48:34.431 if blo < bhi:
2025-07-01 05:48:34.439 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:48:34.446 else:
2025-07-01 05:48:34.452 g = self._dump('-', a, alo, ahi)
2025-07-01 05:48:34.459 elif blo < bhi:
2025-07-01 05:48:34.469 g = self._dump('+', b, blo, bhi)
2025-07-01 05:48:34.478
2025-07-01 05:48:34.483 > yield from g
2025-07-01 05:48:34.489
2025-07-01 05:48:34.498 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:48:34.510 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:48:34.518
2025-07-01 05:48:34.526 self = <difflib.Differ object at [hex]>
2025-07-01 05:48:34.535 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:48:34.542 alo = 308, ahi = 1101
2025-07-01 05:48:34.549 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:48:34.555 blo = 308, bhi = 1101
2025-07-01 05:48:34.560
2025-07-01 05:48:34.566 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:48:34.571 r"""
2025-07-01 05:48:34.576 When replacing one block of lines with another, search the blocks
2025-07-01 05:48:34.581 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:48:34.587 synch point, and intraline difference marking is done on the
2025-07-01 05:48:34.591 similar pair. Lots of work, but often worth it.
2025-07-01 05:48:34.596
2025-07-01 05:48:34.606 Example:
2025-07-01 05:48:34.614
2025-07-01 05:48:34.624 >>> d = Differ()
2025-07-01 05:48:34.634 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:48:34.640 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:48:34.648 >>> print(''.join(results), end="")
2025-07-01 05:48:34.655 - abcDefghiJkl
2025-07-01 05:48:34.674 + abcdefGhijkl
2025-07-01 05:48:34.689 """
2025-07-01 05:48:34.695
2025-07-01 05:48:34.700 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:48:34.706 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:48:34.712 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:48:34.718 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:48:34.726 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:48:34.734
2025-07-01 05:48:34.741 # search for the pair that matches best without being identical
2025-07-01 05:48:34.746 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:48:34.752 # on junk -- unless we have to)
2025-07-01 05:48:34.759 for j in range(blo, bhi):
2025-07-01 05:48:34.765 bj = b[j]
2025-07-01 05:48:34.772 cruncher.set_seq2(bj)
2025-07-01 05:48:34.780 for i in range(alo, ahi):
2025-07-01 05:48:34.787 ai = a[i]
2025-07-01 05:48:34.794 if ai == bj:
2025-07-01 05:48:34.803 if eqi is None:
2025-07-01 05:48:34.815 eqi, eqj = i, j
2025-07-01 05:48:34.827 continue
2025-07-01 05:48:34.836 cruncher.set_seq1(ai)
2025-07-01 05:48:34.843 # computing similarity is expensive, so use the quick
2025-07-01 05:48:34.849 # upper bounds first -- have seen this speed up messy
2025-07-01 05:48:34.856 # compares by a factor of 3.
2025-07-01 05:48:34.862 # note that ratio() is only expensive to compute the first
2025-07-01 05:48:34.867 # time it's called on a sequence pair; the expensive part
2025-07-01 05:48:34.871 # of the computation is cached by cruncher
2025-07-01 05:48:34.876 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:48:34.881 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:48:34.885 cruncher.ratio() > best_ratio:
2025-07-01 05:48:34.893 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:48:34.899 if best_ratio < cutoff:
2025-07-01 05:48:34.907 # no non-identical "pretty close" pair
2025-07-01 05:48:34.918 if eqi is None:
2025-07-01 05:48:34.926 # no identical pair either -- treat it as a straight replace
2025-07-01 05:48:34.936 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:48:34.945 return
2025-07-01 05:48:34.957 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:48:34.967 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:48:34.975 else:
2025-07-01 05:48:34.982 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:48:34.993 eqi = None
2025-07-01 05:48:35.002
2025-07-01 05:48:35.009 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:48:35.016 # identical
2025-07-01 05:48:35.023
2025-07-01 05:48:35.031 # pump out diffs from before the synch point
2025-07-01 05:48:35.042 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:48:35.051
2025-07-01 05:48:35.061 # do intraline marking on the synch pair
2025-07-01 05:48:35.072 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:48:35.081 if eqi is None:
2025-07-01 05:48:35.089 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:48:35.095 atags = btags = ""
2025-07-01 05:48:35.101 cruncher.set_seqs(aelt, belt)
2025-07-01 05:48:35.108 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:48:35.114 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:48:35.119 if tag == 'replace':
2025-07-01 05:48:35.127 atags += '^' * la
2025-07-01 05:48:35.138 btags += '^' * lb
2025-07-01 05:48:35.146 elif tag == 'delete':
2025-07-01 05:48:35.157 atags += '-' * la
2025-07-01 05:48:35.168 elif tag == 'insert':
2025-07-01 05:48:35.180 btags += '+' * lb
2025-07-01 05:48:35.190 elif tag == 'equal':
2025-07-01 05:48:35.198 atags += ' ' * la
2025-07-01 05:48:35.204 btags += ' ' * lb
2025-07-01 05:48:35.210 else:
2025-07-01 05:48:35.215 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:48:35.221 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:48:35.226 else:
2025-07-01 05:48:35.232 # the synch pair is identical
2025-07-01 05:48:35.237 yield ' ' + aelt
2025-07-01 05:48:35.245
2025-07-01 05:48:35.255 # pump out diffs from after the synch point
2025-07-01 05:48:35.263 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:48:35.271
2025-07-01 05:48:35.277 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:48:35.283 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:48:35.290
2025-07-01 05:48:35.296 self = <difflib.Differ object at [hex]>
2025-07-01 05:48:35.302 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:48:35.308 alo = 309, ahi = 1101
2025-07-01 05:48:35.314 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:48:35.320 blo = 309, bhi = 1101
2025-07-01 05:48:35.326
2025-07-01 05:48:35.331 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:48:35.337 g = []
2025-07-01 05:48:35.343 if alo < ahi:
2025-07-01 05:48:35.350 if blo < bhi:
2025-07-01 05:48:35.357 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:48:35.363 else:
2025-07-01 05:48:35.371 g = self._dump('-', a, alo, ahi)
2025-07-01 05:48:35.379 elif blo < bhi:
2025-07-01 05:48:35.390 g = self._dump('+', b, blo, bhi)
2025-07-01 05:48:35.401
2025-07-01 05:48:35.409 > yield from g
2025-07-01 05:48:35.415
2025-07-01 05:48:35.420 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:48:35.426 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:48:35.430
2025-07-01 05:48:35.435 self = <difflib.Differ object at [hex]>
2025-07-01 05:48:35.441 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:48:35.447 alo = 309, ahi = 1101
2025-07-01 05:48:35.452 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:48:35.458 blo = 309, bhi = 1101
2025-07-01 05:48:35.464
2025-07-01 05:48:35.470 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:48:35.479 r"""
2025-07-01 05:48:35.488 When replacing one block of lines with another, search the blocks
2025-07-01 05:48:35.495 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:48:35.501 synch point, and intraline difference marking is done on the
2025-07-01 05:48:35.506 similar pair. Lots of work, but often worth it.
2025-07-01 05:48:35.511
2025-07-01 05:48:35.515 Example:
2025-07-01 05:48:35.520
2025-07-01 05:48:35.525 >>> d = Differ()
2025-07-01 05:48:35.531 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:48:35.537 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:48:35.543 >>> print(''.join(results), end="")
2025-07-01 05:48:35.549 - abcDefghiJkl
2025-07-01 05:48:35.562 + abcdefGhijkl
2025-07-01 05:48:35.581 """
2025-07-01 05:48:35.588
2025-07-01 05:48:35.595 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:48:35.600 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:48:35.605 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:48:35.611 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:48:35.616 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:48:35.622
2025-07-01 05:48:35.628 # search for the pair that matches best without being identical
2025-07-01 05:48:35.634 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:48:35.645 # on junk -- unless we have to)
2025-07-01 05:48:35.652 for j in range(blo, bhi):
2025-07-01 05:48:35.658 bj = b[j]
2025-07-01 05:48:35.663 cruncher.set_seq2(bj)
2025-07-01 05:48:35.668 for i in range(alo, ahi):
2025-07-01 05:48:35.673 ai = a[i]
2025-07-01 05:48:35.678 if ai == bj:
2025-07-01 05:48:35.682 if eqi is None:
2025-07-01 05:48:35.687 eqi, eqj = i, j
2025-07-01 05:48:35.692 continue
2025-07-01 05:48:35.698 cruncher.set_seq1(ai)
2025-07-01 05:48:35.704 # computing similarity is expensive, so use the quick
2025-07-01 05:48:35.710 # upper bounds first -- have seen this speed up messy
2025-07-01 05:48:35.717 # compares by a factor of 3.
2025-07-01 05:48:35.725 # note that ratio() is only expensive to compute the first
2025-07-01 05:48:35.732 # time it's called on a sequence pair; the expensive part
2025-07-01 05:48:35.737 # of the computation is cached by cruncher
2025-07-01 05:48:35.742 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:48:35.747 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:48:35.752 cruncher.ratio() > best_ratio:
2025-07-01 05:48:35.758 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:48:35.763 if best_ratio < cutoff:
2025-07-01 05:48:35.769 # no non-identical "pretty close" pair
2025-07-01 05:48:35.774 if eqi is None:
2025-07-01 05:48:35.783 # no identical pair either -- treat it as a straight replace
2025-07-01 05:48:35.795 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:48:35.802 return
2025-07-01 05:48:35.809 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:48:35.816 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:48:35.822 else:
2025-07-01 05:48:35.830 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:48:35.836 eqi = None
2025-07-01 05:48:35.843
2025-07-01 05:48:35.851 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:48:35.861 # identical
2025-07-01 05:48:35.867
2025-07-01 05:48:35.874 # pump out diffs from before the synch point
2025-07-01 05:48:35.880 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:48:35.887
2025-07-01 05:48:35.895 # do intraline marking on the synch pair
2025-07-01 05:48:35.906 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:48:35.913 if eqi is None:
2025-07-01 05:48:35.920 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:48:35.926 atags = btags = ""
2025-07-01 05:48:35.931 cruncher.set_seqs(aelt, belt)
2025-07-01 05:48:35.937 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:48:35.942 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:48:35.948 if tag == 'replace':
2025-07-01 05:48:35.955 atags += '^' * la
2025-07-01 05:48:35.964 btags += '^' * lb
2025-07-01 05:48:35.971 elif tag == 'delete':
2025-07-01 05:48:35.978 atags += '-' * la
2025-07-01 05:48:35.984 elif tag == 'insert':
2025-07-01 05:48:35.989 btags += '+' * lb
2025-07-01 05:48:35.996 elif tag == 'equal':
2025-07-01 05:48:36.002 atags += ' ' * la
2025-07-01 05:48:36.007 btags += ' ' * lb
2025-07-01 05:48:36.011 else:
2025-07-01 05:48:36.016 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:48:36.021 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:48:36.026 else:
2025-07-01 05:48:36.031 # the synch pair is identical
2025-07-01 05:48:36.037 yield ' ' + aelt
2025-07-01 05:48:36.042
2025-07-01 05:48:36.051 # pump out diffs from after the synch point
2025-07-01 05:48:36.062 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:48:36.068
2025-07-01 05:48:36.075 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:48:36.085 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:48:36.094
2025-07-01 05:48:36.102 self = <difflib.Differ object at [hex]>
2025-07-01 05:48:36.109 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:48:36.116 alo = 312, ahi = 1101
2025-07-01 05:48:36.131 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:48:36.142 blo = 312, bhi = 1101
2025-07-01 05:48:36.151
2025-07-01 05:48:36.157 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:48:36.162 g = []
2025-07-01 05:48:36.168 if alo < ahi:
2025-07-01 05:48:36.175 if blo < bhi:
2025-07-01 05:48:36.182 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:48:36.192 else:
2025-07-01 05:48:36.201 g = self._dump('-', a, alo, ahi)
2025-07-01 05:48:36.207 elif blo < bhi:
2025-07-01 05:48:36.213 g = self._dump('+', b, blo, bhi)
2025-07-01 05:48:36.218
2025-07-01 05:48:36.231 > yield from g
2025-07-01 05:48:36.237
2025-07-01 05:48:36.242 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:48:36.247 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:48:36.251
2025-07-01 05:48:36.256 self = <difflib.Differ object at [hex]>
2025-07-01 05:48:36.261 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:48:36.266 alo = 312, ahi = 1101
2025-07-01 05:48:36.272 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:48:36.277 blo = 312, bhi = 1101
2025-07-01 05:48:36.287
2025-07-01 05:48:36.294 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:48:36.302 r"""
2025-07-01 05:48:36.312 When replacing one block of lines with another, search the blocks
2025-07-01 05:48:36.321 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:48:36.331 synch point, and intraline difference marking is done on the
2025-07-01 05:48:36.339 similar pair. Lots of work, but often worth it.
2025-07-01 05:48:36.346
2025-07-01 05:48:36.353 Example:
2025-07-01 05:48:36.359
2025-07-01 05:48:36.366 >>> d = Differ()
2025-07-01 05:48:36.374 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:48:36.387 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:48:36.397 >>> print(''.join(results), end="")
2025-07-01 05:48:36.406 - abcDefghiJkl
2025-07-01 05:48:36.422 + abcdefGhijkl
2025-07-01 05:48:36.442 """
2025-07-01 05:48:36.449
2025-07-01 05:48:36.460 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:48:36.470 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:48:36.478 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:48:36.485 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:48:36.489 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:48:36.494
2025-07-01 05:48:36.499 # search for the pair that matches best without being identical
2025-07-01 05:48:36.504 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:48:36.511 # on junk -- unless we have to)
2025-07-01 05:48:36.518 for j in range(blo, bhi):
2025-07-01 05:48:36.523 bj = b[j]
2025-07-01 05:48:36.528 cruncher.set_seq2(bj)
2025-07-01 05:48:36.533 for i in range(alo, ahi):
2025-07-01 05:48:36.539 ai = a[i]
2025-07-01 05:48:36.544 if ai == bj:
2025-07-01 05:48:36.556 if eqi is None:
2025-07-01 05:48:36.565 eqi, eqj = i, j
2025-07-01 05:48:36.572 continue
2025-07-01 05:48:36.579 cruncher.set_seq1(ai)
2025-07-01 05:48:36.587 # computing similarity is expensive, so use the quick
2025-07-01 05:48:36.599 # upper bounds first -- have seen this speed up messy
2025-07-01 05:48:36.610 # compares by a factor of 3.
2025-07-01 05:48:36.621 # note that ratio() is only expensive to compute the first
2025-07-01 05:48:36.629 # time it's called on a sequence pair; the expensive part
2025-07-01 05:48:36.640 # of the computation is cached by cruncher
2025-07-01 05:48:36.653 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:48:36.665 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:48:36.676 cruncher.ratio() > best_ratio:
2025-07-01 05:48:36.688 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:48:36.697 if best_ratio < cutoff:
2025-07-01 05:48:36.704 # no non-identical "pretty close" pair
2025-07-01 05:48:36.711 if eqi is None:
2025-07-01 05:48:36.719 # no identical pair either -- treat it as a straight replace
2025-07-01 05:48:36.729 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:48:36.738 return
2025-07-01 05:48:36.745 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:48:36.750 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:48:36.756 else:
2025-07-01 05:48:36.762 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:48:36.768 eqi = None
2025-07-01 05:48:36.774
2025-07-01 05:48:36.785 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:48:36.795 # identical
2025-07-01 05:48:36.804
2025-07-01 05:48:36.810 # pump out diffs from before the synch point
2025-07-01 05:48:36.817 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:48:36.823
2025-07-01 05:48:36.828 # do intraline marking on the synch pair
2025-07-01 05:48:36.835 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:48:36.843 if eqi is None:
2025-07-01 05:48:36.853 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:48:36.862 atags = btags = ""
2025-07-01 05:48:36.871 cruncher.set_seqs(aelt, belt)
2025-07-01 05:48:36.884 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:48:36.895 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:48:36.904 if tag == 'replace':
2025-07-01 05:48:36.913 atags += '^' * la
2025-07-01 05:48:36.920 btags += '^' * lb
2025-07-01 05:48:36.926 elif tag == 'delete':
2025-07-01 05:48:36.933 atags += '-' * la
2025-07-01 05:48:36.938 elif tag == 'insert':
2025-07-01 05:48:36.944 btags += '+' * lb
2025-07-01 05:48:36.949 elif tag == 'equal':
2025-07-01 05:48:36.956 atags += ' ' * la
2025-07-01 05:48:36.963 btags += ' ' * lb
2025-07-01 05:48:36.968 else:
2025-07-01 05:48:36.974 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:48:36.985 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:48:36.994 else:
2025-07-01 05:48:37.002 # the synch pair is identical
2025-07-01 05:48:37.008 yield ' ' + aelt
2025-07-01 05:48:37.014
2025-07-01 05:48:37.018 # pump out diffs from after the synch point
2025-07-01 05:48:37.024 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:48:37.030
2025-07-01 05:48:37.039 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:48:37.051 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:48:37.059
2025-07-01 05:48:37.067 self = <difflib.Differ object at [hex]>
2025-07-01 05:48:37.075 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:48:37.084 alo = 313, ahi = 1101
2025-07-01 05:48:37.094 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:48:37.103 blo = 313, bhi = 1101
2025-07-01 05:48:37.112
2025-07-01 05:48:37.119 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:48:37.126 g = []
2025-07-01 05:48:37.133 if alo < ahi:
2025-07-01 05:48:37.139 if blo < bhi:
2025-07-01 05:48:37.145 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:48:37.150 else:
2025-07-01 05:48:37.160 g = self._dump('-', a, alo, ahi)
2025-07-01 05:48:37.169 elif blo < bhi:
2025-07-01 05:48:37.176 g = self._dump('+', b, blo, bhi)
2025-07-01 05:48:37.183
2025-07-01 05:48:37.194 > yield from g
2025-07-01 05:48:37.202
2025-07-01 05:48:37.208 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:48:37.214 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:48:37.219
2025-07-01 05:48:37.223 self = <difflib.Differ object at [hex]>
2025-07-01 05:48:37.228 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:48:37.234 alo = 313, ahi = 1101
2025-07-01 05:48:37.240 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:48:37.245 blo = 313, bhi = 1101
2025-07-01 05:48:37.251
2025-07-01 05:48:37.258 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:48:37.267 r"""
2025-07-01 05:48:37.277 When replacing one block of lines with another, search the blocks
2025-07-01 05:48:37.286 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:48:37.297 synch point, and intraline difference marking is done on the
2025-07-01 05:48:37.304 similar pair. Lots of work, but often worth it.
2025-07-01 05:48:37.311
2025-07-01 05:48:37.318 Example:
2025-07-01 05:48:37.328
2025-07-01 05:48:37.339 >>> d = Differ()
2025-07-01 05:48:37.346 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:48:37.353 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:48:37.360 >>> print(''.join(results), end="")
2025-07-01 05:48:37.367 - abcDefghiJkl
2025-07-01 05:48:37.385 + abcdefGhijkl
2025-07-01 05:48:37.405 """
2025-07-01 05:48:37.413
2025-07-01 05:48:37.424 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:48:37.433 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:48:37.444 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:48:37.453 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:48:37.460 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:48:37.466
2025-07-01 05:48:37.478 # search for the pair that matches best without being identical
2025-07-01 05:48:37.489 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:48:37.498 # on junk -- unless we have to)
2025-07-01 05:48:37.507 for j in range(blo, bhi):
2025-07-01 05:48:37.514 bj = b[j]
2025-07-01 05:48:37.523 cruncher.set_seq2(bj)
2025-07-01 05:48:37.533 for i in range(alo, ahi):
2025-07-01 05:48:37.541 ai = a[i]
2025-07-01 05:48:37.548 if ai == bj:
2025-07-01 05:48:37.554 if eqi is None:
2025-07-01 05:48:37.561 eqi, eqj = i, j
2025-07-01 05:48:37.567 continue
2025-07-01 05:48:37.573 cruncher.set_seq1(ai)
2025-07-01 05:48:37.579 # computing similarity is expensive, so use the quick
2025-07-01 05:48:37.587 # upper bounds first -- have seen this speed up messy
2025-07-01 05:48:37.597 # compares by a factor of 3.
2025-07-01 05:48:37.606 # note that ratio() is only expensive to compute the first
2025-07-01 05:48:37.614 # time it's called on a sequence pair; the expensive part
2025-07-01 05:48:37.624 # of the computation is cached by cruncher
2025-07-01 05:48:37.637 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:48:37.648 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:48:37.658 cruncher.ratio() > best_ratio:
2025-07-01 05:48:37.669 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:48:37.679 if best_ratio < cutoff:
2025-07-01 05:48:37.689 # no non-identical "pretty close" pair
2025-07-01 05:48:37.697 if eqi is None:
2025-07-01 05:48:37.706 # no identical pair either -- treat it as a straight replace
2025-07-01 05:48:37.714 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:48:37.720 return
2025-07-01 05:48:37.726 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:48:37.730 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:48:37.735 else:
2025-07-01 05:48:37.740 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:48:37.746 eqi = None
2025-07-01 05:48:37.751
2025-07-01 05:48:37.757 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:48:37.762 # identical
2025-07-01 05:48:37.767
2025-07-01 05:48:37.773 # pump out diffs from before the synch point
2025-07-01 05:48:37.779 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:48:37.784
2025-07-01 05:48:37.790 # do intraline marking on the synch pair
2025-07-01 05:48:37.796 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:48:37.802 if eqi is None:
2025-07-01 05:48:37.813 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:48:37.824 atags = btags = ""
2025-07-01 05:48:37.835 cruncher.set_seqs(aelt, belt)
2025-07-01 05:48:37.846 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:48:37.855 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:48:37.869 if tag == 'replace':
2025-07-01 05:48:37.880 atags += '^' * la
2025-07-01 05:48:37.888 btags += '^' * lb
2025-07-01 05:48:37.895 elif tag == 'delete':
2025-07-01 05:48:37.902 atags += '-' * la
2025-07-01 05:48:37.912 elif tag == 'insert':
2025-07-01 05:48:37.921 btags += '+' * lb
2025-07-01 05:48:37.927 elif tag == 'equal':
2025-07-01 05:48:37.941 atags += ' ' * la
2025-07-01 05:48:37.952 btags += ' ' * lb
2025-07-01 05:48:37.963 else:
2025-07-01 05:48:37.972 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:48:37.980 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:48:37.987 else:
2025-07-01 05:48:37.995 # the synch pair is identical
2025-07-01 05:48:38.005 yield ' ' + aelt
2025-07-01 05:48:38.014
2025-07-01 05:48:38.026 # pump out diffs from after the synch point
2025-07-01 05:48:38.034 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:48:38.042
2025-07-01 05:48:38.049 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:48:38.055 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:48:38.060
2025-07-01 05:48:38.065 self = <difflib.Differ object at [hex]>
2025-07-01 05:48:38.070 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:48:38.075 alo = 314, ahi = 1101
2025-07-01 05:48:38.080 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:48:38.084 blo = 314, bhi = 1101
2025-07-01 05:48:38.089
2025-07-01 05:48:38.096 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:48:38.102 g = []
2025-07-01 05:48:38.111 if alo < ahi:
2025-07-01 05:48:38.123 if blo < bhi:
2025-07-01 05:48:38.133 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:48:38.141 else:
2025-07-01 05:48:38.148 g = self._dump('-', a, alo, ahi)
2025-07-01 05:48:38.155 elif blo < bhi:
2025-07-01 05:48:38.163 g = self._dump('+', b, blo, bhi)
2025-07-01 05:48:38.169
2025-07-01 05:48:38.175 > yield from g
2025-07-01 05:48:38.180
2025-07-01 05:48:38.189 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:48:38.200 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:48:38.208
2025-07-01 05:48:38.214 self = <difflib.Differ object at [hex]>
2025-07-01 05:48:38.221 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:48:38.227 alo = 314, ahi = 1101
2025-07-01 05:48:38.234 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:48:38.246 blo = 314, bhi = 1101
2025-07-01 05:48:38.255
2025-07-01 05:48:38.262 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:48:38.270 r"""
2025-07-01 05:48:38.278 When replacing one block of lines with another, search the blocks
2025-07-01 05:48:38.290 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:48:38.302 synch point, and intraline difference marking is done on the
2025-07-01 05:48:38.314 similar pair. Lots of work, but often worth it.
2025-07-01 05:48:38.324
2025-07-01 05:48:38.333 Example:
2025-07-01 05:48:38.340
2025-07-01 05:48:38.346 >>> d = Differ()
2025-07-01 05:48:38.353 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:48:38.362 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:48:38.369 >>> print(''.join(results), end="")
2025-07-01 05:48:38.376 - abcDefghiJkl
2025-07-01 05:48:38.390 + abcdefGhijkl
2025-07-01 05:48:38.402 """
2025-07-01 05:48:38.407
2025-07-01 05:48:38.414 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:48:38.421 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:48:38.428 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:48:38.434 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:48:38.443 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:48:38.454
2025-07-01 05:48:38.461 # search for the pair that matches best without being identical
2025-07-01 05:48:38.467 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:48:38.474 # on junk -- unless we have to)
2025-07-01 05:48:38.485 for j in range(blo, bhi):
2025-07-01 05:48:38.494 bj = b[j]
2025-07-01 05:48:38.501 cruncher.set_seq2(bj)
2025-07-01 05:48:38.512 for i in range(alo, ahi):
2025-07-01 05:48:38.520 ai = a[i]
2025-07-01 05:48:38.529 if ai == bj:
2025-07-01 05:48:38.535 if eqi is None:
2025-07-01 05:48:38.543 eqi, eqj = i, j
2025-07-01 05:48:38.551 continue
2025-07-01 05:48:38.559 cruncher.set_seq1(ai)
2025-07-01 05:48:38.566 # computing similarity is expensive, so use the quick
2025-07-01 05:48:38.573 # upper bounds first -- have seen this speed up messy
2025-07-01 05:48:38.579 # compares by a factor of 3.
2025-07-01 05:48:38.587 # note that ratio() is only expensive to compute the first
2025-07-01 05:48:38.595 # time it's called on a sequence pair; the expensive part
2025-07-01 05:48:38.603 # of the computation is cached by cruncher
2025-07-01 05:48:38.615 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:48:38.625 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:48:38.636 cruncher.ratio() > best_ratio:
2025-07-01 05:48:38.644 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:48:38.651 if best_ratio < cutoff:
2025-07-01 05:48:38.656 # no non-identical "pretty close" pair
2025-07-01 05:48:38.661 if eqi is None:
2025-07-01 05:48:38.666 # no identical pair either -- treat it as a straight replace
2025-07-01 05:48:38.670 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:48:38.674 return
2025-07-01 05:48:38.679 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:48:38.683 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:48:38.694 else:
2025-07-01 05:48:38.701 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:48:38.707 eqi = None
2025-07-01 05:48:38.712
2025-07-01 05:48:38.717 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:48:38.722 # identical
2025-07-01 05:48:38.727
2025-07-01 05:48:38.735 # pump out diffs from before the synch point
2025-07-01 05:48:38.744 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:48:38.752
2025-07-01 05:48:38.761 # do intraline marking on the synch pair
2025-07-01 05:48:38.767 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:48:38.773 if eqi is None:
2025-07-01 05:48:38.778 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:48:38.783 atags = btags = ""
2025-07-01 05:48:38.787 cruncher.set_seqs(aelt, belt)
2025-07-01 05:48:38.792 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:48:38.797 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:48:38.803 if tag == 'replace':
2025-07-01 05:48:38.810 atags += '^' * la
2025-07-01 05:48:38.815 btags += '^' * lb
2025-07-01 05:48:38.820 elif tag == 'delete':
2025-07-01 05:48:38.824 atags += '-' * la
2025-07-01 05:48:38.829 elif tag == 'insert':
2025-07-01 05:48:38.833 btags += '+' * lb
2025-07-01 05:48:38.838 elif tag == 'equal':
2025-07-01 05:48:38.843 atags += ' ' * la
2025-07-01 05:48:38.847 btags += ' ' * lb
2025-07-01 05:48:38.852 else:
2025-07-01 05:48:38.856 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:48:38.861 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:48:38.865 else:
2025-07-01 05:48:38.870 # the synch pair is identical
2025-07-01 05:48:38.875 yield ' ' + aelt
2025-07-01 05:48:38.881
2025-07-01 05:48:38.886 # pump out diffs from after the synch point
2025-07-01 05:48:38.893 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:48:38.900
2025-07-01 05:48:38.907 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:48:38.913 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:48:38.918
2025-07-01 05:48:38.923 self = <difflib.Differ object at [hex]>
2025-07-01 05:48:38.929 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:48:38.935 alo = 315, ahi = 1101
2025-07-01 05:48:38.941 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:48:38.946 blo = 315, bhi = 1101
2025-07-01 05:48:38.951
2025-07-01 05:48:38.956 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:48:38.962 g = []
2025-07-01 05:48:38.968 if alo < ahi:
2025-07-01 05:48:38.974 if blo < bhi:
2025-07-01 05:48:38.983 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:48:38.992 else:
2025-07-01 05:48:38.999 g = self._dump('-', a, alo, ahi)
2025-07-01 05:48:39.005 elif blo < bhi:
2025-07-01 05:48:39.010 g = self._dump('+', b, blo, bhi)
2025-07-01 05:48:39.015
2025-07-01 05:48:39.019 > yield from g
2025-07-01 05:48:39.024
2025-07-01 05:48:39.028 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:48:39.033 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:48:39.038
2025-07-01 05:48:39.044 self = <difflib.Differ object at [hex]>
2025-07-01 05:48:39.053 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:48:39.058 alo = 315, ahi = 1101
2025-07-01 05:48:39.067 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:48:39.072 blo = 315, bhi = 1101
2025-07-01 05:48:39.077
2025-07-01 05:48:39.083 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:48:39.091 r"""
2025-07-01 05:48:39.101 When replacing one block of lines with another, search the blocks
2025-07-01 05:48:39.113 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:48:39.123 synch point, and intraline difference marking is done on the
2025-07-01 05:48:39.132 similar pair. Lots of work, but often worth it.
2025-07-01 05:48:39.139
2025-07-01 05:48:39.146 Example:
2025-07-01 05:48:39.156
2025-07-01 05:48:39.164 >>> d = Differ()
2025-07-01 05:48:39.174 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:48:39.184 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:48:39.193 >>> print(''.join(results), end="")
2025-07-01 05:48:39.201 - abcDefghiJkl
2025-07-01 05:48:39.214 + abcdefGhijkl
2025-07-01 05:48:39.225 """
2025-07-01 05:48:39.230
2025-07-01 05:48:39.243 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:48:39.253 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:48:39.262 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:48:39.271 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:48:39.282 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:48:39.291
2025-07-01 05:48:39.302 # search for the pair that matches best without being identical
2025-07-01 05:48:39.316 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:48:39.326 # on junk -- unless we have to)
2025-07-01 05:48:39.334 for j in range(blo, bhi):
2025-07-01 05:48:39.342 bj = b[j]
2025-07-01 05:48:39.350 cruncher.set_seq2(bj)
2025-07-01 05:48:39.360 for i in range(alo, ahi):
2025-07-01 05:48:39.372 ai = a[i]
2025-07-01 05:48:39.383 if ai == bj:
2025-07-01 05:48:39.394 if eqi is None:
2025-07-01 05:48:39.406 eqi, eqj = i, j
2025-07-01 05:48:39.414 continue
2025-07-01 05:48:39.421 cruncher.set_seq1(ai)
2025-07-01 05:48:39.428 # computing similarity is expensive, so use the quick
2025-07-01 05:48:39.435 # upper bounds first -- have seen this speed up messy
2025-07-01 05:48:39.443 # compares by a factor of 3.
2025-07-01 05:48:39.452 # note that ratio() is only expensive to compute the first
2025-07-01 05:48:39.459 # time it's called on a sequence pair; the expensive part
2025-07-01 05:48:39.466 # of the computation is cached by cruncher
2025-07-01 05:48:39.472 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:48:39.478 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:48:39.485 cruncher.ratio() > best_ratio:
2025-07-01 05:48:39.492 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:48:39.499 if best_ratio < cutoff:
2025-07-01 05:48:39.506 # no non-identical "pretty close" pair
2025-07-01 05:48:39.518 if eqi is None:
2025-07-01 05:48:39.528 # no identical pair either -- treat it as a straight replace
2025-07-01 05:48:39.537 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:48:39.543 return
2025-07-01 05:48:39.550 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:48:39.559 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:48:39.570 else:
2025-07-01 05:48:39.581 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:48:39.592 eqi = None
2025-07-01 05:48:39.599
2025-07-01 05:48:39.606 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:48:39.619 # identical
2025-07-01 05:48:39.630
2025-07-01 05:48:39.642 # pump out diffs from before the synch point
2025-07-01 05:48:39.650 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:48:39.659
2025-07-01 05:48:39.670 # do intraline marking on the synch pair
2025-07-01 05:48:39.680 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:48:39.688 if eqi is None:
2025-07-01 05:48:39.695 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:48:39.704 atags = btags = ""
2025-07-01 05:48:39.715 cruncher.set_seqs(aelt, belt)
2025-07-01 05:48:39.724 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:48:39.732 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:48:39.739 if tag == 'replace':
2025-07-01 05:48:39.754 atags += '^' * la
2025-07-01 05:48:39.763 btags += '^' * lb
2025-07-01 05:48:39.770 elif tag == 'delete':
2025-07-01 05:48:39.782 atags += '-' * la
2025-07-01 05:48:39.792 elif tag == 'insert':
2025-07-01 05:48:39.800 btags += '+' * lb
2025-07-01 05:48:39.807 elif tag == 'equal':
2025-07-01 05:48:39.815 atags += ' ' * la
2025-07-01 05:48:39.821 btags += ' ' * lb
2025-07-01 05:48:39.825 else:
2025-07-01 05:48:39.831 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:48:39.838 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:48:39.844 else:
2025-07-01 05:48:39.850 # the synch pair is identical
2025-07-01 05:48:39.855 yield ' ' + aelt
2025-07-01 05:48:39.864
2025-07-01 05:48:39.875 # pump out diffs from after the synch point
2025-07-01 05:48:39.884 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:48:39.891
2025-07-01 05:48:39.898 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:48:39.905 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:48:39.910
2025-07-01 05:48:39.915 self = <difflib.Differ object at [hex]>
2025-07-01 05:48:39.923 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:48:39.928 alo = 316, ahi = 1101
2025-07-01 05:48:39.936 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:48:39.943 blo = 316, bhi = 1101
2025-07-01 05:48:39.948
2025-07-01 05:48:39.954 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:48:39.960 g = []
2025-07-01 05:48:39.966 if alo < ahi:
2025-07-01 05:48:39.972 if blo < bhi:
2025-07-01 05:48:39.978 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:48:39.988 else:
2025-07-01 05:48:40.000 g = self._dump('-', a, alo, ahi)
2025-07-01 05:48:40.011 elif blo < bhi:
2025-07-01 05:48:40.021 g = self._dump('+', b, blo, bhi)
2025-07-01 05:48:40.033
2025-07-01 05:48:40.042 > yield from g
2025-07-01 05:48:40.048
2025-07-01 05:48:40.054 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:48:40.062 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:48:40.069
2025-07-01 05:48:40.075 self = <difflib.Differ object at [hex]>
2025-07-01 05:48:40.084 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:48:40.096 alo = 316, ahi = 1101
2025-07-01 05:48:40.107 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:48:40.116 blo = 316, bhi = 1101
2025-07-01 05:48:40.122
2025-07-01 05:48:40.131 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:48:40.141 r"""
2025-07-01 05:48:40.149 When replacing one block of lines with another, search the blocks
2025-07-01 05:48:40.157 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:48:40.164 synch point, and intraline difference marking is done on the
2025-07-01 05:48:40.171 similar pair. Lots of work, but often worth it.
2025-07-01 05:48:40.177
2025-07-01 05:48:40.183 Example:
2025-07-01 05:48:40.188
2025-07-01 05:48:40.193 >>> d = Differ()
2025-07-01 05:48:40.199 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:48:40.207 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:48:40.218 >>> print(''.join(results), end="")
2025-07-01 05:48:40.229 - abcDefghiJkl
2025-07-01 05:48:40.253 + abcdefGhijkl
2025-07-01 05:48:40.271 """
2025-07-01 05:48:40.281
2025-07-01 05:48:40.292 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:48:40.304 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:48:40.311 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:48:40.317 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:48:40.322 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:48:40.327
2025-07-01 05:48:40.331 # search for the pair that matches best without being identical
2025-07-01 05:48:40.342 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:48:40.349 # on junk -- unless we have to)
2025-07-01 05:48:40.355 for j in range(blo, bhi):
2025-07-01 05:48:40.361 bj = b[j]
2025-07-01 05:48:40.367 cruncher.set_seq2(bj)
2025-07-01 05:48:40.374 for i in range(alo, ahi):
2025-07-01 05:48:40.380 ai = a[i]
2025-07-01 05:48:40.387 if ai == bj:
2025-07-01 05:48:40.393 if eqi is None:
2025-07-01 05:48:40.400 eqi, eqj = i, j
2025-07-01 05:48:40.406 continue
2025-07-01 05:48:40.416 cruncher.set_seq1(ai)
2025-07-01 05:48:40.426 # computing similarity is expensive, so use the quick
2025-07-01 05:48:40.433 # upper bounds first -- have seen this speed up messy
2025-07-01 05:48:40.438 # compares by a factor of 3.
2025-07-01 05:48:40.449 # note that ratio() is only expensive to compute the first
2025-07-01 05:48:40.462 # time it's called on a sequence pair; the expensive part
2025-07-01 05:48:40.475 # of the computation is cached by cruncher
2025-07-01 05:48:40.486 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:48:40.498 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:48:40.510 cruncher.ratio() > best_ratio:
2025-07-01 05:48:40.519 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:48:40.526 if best_ratio < cutoff:
2025-07-01 05:48:40.536 # no non-identical "pretty close" pair
2025-07-01 05:48:40.550 if eqi is None:
2025-07-01 05:48:40.558 # no identical pair either -- treat it as a straight replace
2025-07-01 05:48:40.565 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:48:40.573 return
2025-07-01 05:48:40.579 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:48:40.584 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:48:40.590 else:
2025-07-01 05:48:40.594 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:48:40.600 eqi = None
2025-07-01 05:48:40.606
2025-07-01 05:48:40.613 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:48:40.619 # identical
2025-07-01 05:48:40.625
2025-07-01 05:48:40.630 # pump out diffs from before the synch point
2025-07-01 05:48:40.635 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:48:40.641
2025-07-01 05:48:40.649 # do intraline marking on the synch pair
2025-07-01 05:48:40.657 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:48:40.663 if eqi is None:
2025-07-01 05:48:40.671 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:48:40.682 atags = btags = ""
2025-07-01 05:48:40.691 cruncher.set_seqs(aelt, belt)
2025-07-01 05:48:40.698 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:48:40.707 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:48:40.718 if tag == 'replace':
2025-07-01 05:48:40.726 atags += '^' * la
2025-07-01 05:48:40.733 btags += '^' * lb
2025-07-01 05:48:40.738 elif tag == 'delete':
2025-07-01 05:48:40.745 atags += '-' * la
2025-07-01 05:48:40.756 elif tag == 'insert':
2025-07-01 05:48:40.765 btags += '+' * lb
2025-07-01 05:48:40.772 elif tag == 'equal':
2025-07-01 05:48:40.779 atags += ' ' * la
2025-07-01 05:48:40.788 btags += ' ' * lb
2025-07-01 05:48:40.799 else:
2025-07-01 05:48:40.810 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:48:40.819 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:48:40.829 else:
2025-07-01 05:48:40.838 # the synch pair is identical
2025-07-01 05:48:40.848 yield ' ' + aelt
2025-07-01 05:48:40.856
2025-07-01 05:48:40.863 # pump out diffs from after the synch point
2025-07-01 05:48:40.870 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:48:40.878
2025-07-01 05:48:40.887 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:48:40.899 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:48:40.908
2025-07-01 05:48:40.916 self = <difflib.Differ object at [hex]>
2025-07-01 05:48:40.925 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:48:40.931 alo = 317, ahi = 1101
2025-07-01 05:48:40.937 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:48:40.942 blo = 317, bhi = 1101
2025-07-01 05:48:40.952
2025-07-01 05:48:40.961 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:48:40.969 g = []
2025-07-01 05:48:40.975 if alo < ahi:
2025-07-01 05:48:40.982 if blo < bhi:
2025-07-01 05:48:40.988 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:48:40.994 else:
2025-07-01 05:48:41.000 g = self._dump('-', a, alo, ahi)
2025-07-01 05:48:41.005 elif blo < bhi:
2025-07-01 05:48:41.010 g = self._dump('+', b, blo, bhi)
2025-07-01 05:48:41.014
2025-07-01 05:48:41.019 > yield from g
2025-07-01 05:48:41.024
2025-07-01 05:48:41.029 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:48:41.034 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:48:41.038
2025-07-01 05:48:41.043 self = <difflib.Differ object at [hex]>
2025-07-01 05:48:41.050 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:48:41.060 alo = 317, ahi = 1101
2025-07-01 05:48:41.072 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:48:41.083 blo = 317, bhi = 1101
2025-07-01 05:48:41.092
2025-07-01 05:48:41.104 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:48:41.115 r"""
2025-07-01 05:48:41.126 When replacing one block of lines with another, search the blocks
2025-07-01 05:48:41.135 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:48:41.144 synch point, and intraline difference marking is done on the
2025-07-01 05:48:41.153 similar pair. Lots of work, but often worth it.
2025-07-01 05:48:41.160
2025-07-01 05:48:41.169 Example:
2025-07-01 05:48:41.174
2025-07-01 05:48:41.180 >>> d = Differ()
2025-07-01 05:48:41.184 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:48:41.189 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:48:41.195 >>> print(''.join(results), end="")
2025-07-01 05:48:41.204 - abcDefghiJkl
2025-07-01 05:48:41.217 + abcdefGhijkl
2025-07-01 05:48:41.231 """
2025-07-01 05:48:41.241
2025-07-01 05:48:41.249 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:48:41.258 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:48:41.270 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:48:41.281 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:48:41.291 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:48:41.302
2025-07-01 05:48:41.313 # search for the pair that matches best without being identical
2025-07-01 05:48:41.326 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:48:41.335 # on junk -- unless we have to)
2025-07-01 05:48:41.349 for j in range(blo, bhi):
2025-07-01 05:48:41.359 bj = b[j]
2025-07-01 05:48:41.368 cruncher.set_seq2(bj)
2025-07-01 05:48:41.374 for i in range(alo, ahi):
2025-07-01 05:48:41.383 ai = a[i]
2025-07-01 05:48:41.394 if ai == bj:
2025-07-01 05:48:41.407 if eqi is None:
2025-07-01 05:48:41.418 eqi, eqj = i, j
2025-07-01 05:48:41.427 continue
2025-07-01 05:48:41.436 cruncher.set_seq1(ai)
2025-07-01 05:48:41.443 # computing similarity is expensive, so use the quick
2025-07-01 05:48:41.450 # upper bounds first -- have seen this speed up messy
2025-07-01 05:48:41.457 # compares by a factor of 3.
2025-07-01 05:48:41.463 # note that ratio() is only expensive to compute the first
2025-07-01 05:48:41.472 # time it's called on a sequence pair; the expensive part
2025-07-01 05:48:41.481 # of the computation is cached by cruncher
2025-07-01 05:48:41.491 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:48:41.506 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:48:41.516 cruncher.ratio() > best_ratio:
2025-07-01 05:48:41.524 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:48:41.531 if best_ratio < cutoff:
2025-07-01 05:48:41.539 # no non-identical "pretty close" pair
2025-07-01 05:48:41.550 if eqi is None:
2025-07-01 05:48:41.558 # no identical pair either -- treat it as a straight replace
2025-07-01 05:48:41.567 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:48:41.575 return
2025-07-01 05:48:41.583 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:48:41.590 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:48:41.602 else:
2025-07-01 05:48:41.614 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:48:41.626 eqi = None
2025-07-01 05:48:41.635
2025-07-01 05:48:41.643 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:48:41.651 # identical
2025-07-01 05:48:41.662
2025-07-01 05:48:41.672 # pump out diffs from before the synch point
2025-07-01 05:48:41.680 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:48:41.686
2025-07-01 05:48:41.692 # do intraline marking on the synch pair
2025-07-01 05:48:41.698 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:48:41.703 if eqi is None:
2025-07-01 05:48:41.709 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:48:41.716 atags = btags = ""
2025-07-01 05:48:41.721 cruncher.set_seqs(aelt, belt)
2025-07-01 05:48:41.727 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:48:41.733 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:48:41.738 if tag == 'replace':
2025-07-01 05:48:41.748 atags += '^' * la
2025-07-01 05:48:41.757 btags += '^' * lb
2025-07-01 05:48:41.767 elif tag == 'delete':
2025-07-01 05:48:41.780 atags += '-' * la
2025-07-01 05:48:41.789 elif tag == 'insert':
2025-07-01 05:48:41.797 btags += '+' * lb
2025-07-01 05:48:41.804 elif tag == 'equal':
2025-07-01 05:48:41.810 atags += ' ' * la
2025-07-01 05:48:41.820 btags += ' ' * lb
2025-07-01 05:48:41.830 else:
2025-07-01 05:48:41.837 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:48:41.844 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:48:41.850 else:
2025-07-01 05:48:41.860 # the synch pair is identical
2025-07-01 05:48:41.869 yield ' ' + aelt
2025-07-01 05:48:41.876
2025-07-01 05:48:41.886 # pump out diffs from after the synch point
2025-07-01 05:48:41.896 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:48:41.904
2025-07-01 05:48:41.911 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:48:41.918 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:48:41.929
2025-07-01 05:48:41.937 self = <difflib.Differ object at [hex]>
2025-07-01 05:48:41.945 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:48:41.951 alo = 318, ahi = 1101
2025-07-01 05:48:41.957 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:48:41.962 blo = 318, bhi = 1101
2025-07-01 05:48:41.967
2025-07-01 05:48:41.973 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:48:41.978 g = []
2025-07-01 05:48:41.988 if alo < ahi:
2025-07-01 05:48:41.998 if blo < bhi:
2025-07-01 05:48:42.006 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:48:42.019 else:
2025-07-01 05:48:42.029 g = self._dump('-', a, alo, ahi)
2025-07-01 05:48:42.037 elif blo < bhi:
2025-07-01 05:48:42.042 g = self._dump('+', b, blo, bhi)
2025-07-01 05:48:42.048
2025-07-01 05:48:42.054 > yield from g
2025-07-01 05:48:42.064
2025-07-01 05:48:42.072 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:48:42.080 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:48:42.088
2025-07-01 05:48:42.098 self = <difflib.Differ object at [hex]>
2025-07-01 05:48:42.105 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:48:42.112 alo = 318, ahi = 1101
2025-07-01 05:48:42.118 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:48:42.122 blo = 318, bhi = 1101
2025-07-01 05:48:42.134
2025-07-01 05:48:42.142 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:48:42.149 r"""
2025-07-01 05:48:42.158 When replacing one block of lines with another, search the blocks
2025-07-01 05:48:42.169 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:48:42.179 synch point, and intraline difference marking is done on the
2025-07-01 05:48:42.189 similar pair. Lots of work, but often worth it.
2025-07-01 05:48:42.199
2025-07-01 05:48:42.207 Example:
2025-07-01 05:48:42.212
2025-07-01 05:48:42.217 >>> d = Differ()
2025-07-01 05:48:42.221 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:48:42.225 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:48:42.230 >>> print(''.join(results), end="")
2025-07-01 05:48:42.234 - abcDefghiJkl
2025-07-01 05:48:42.243 + abcdefGhijkl
2025-07-01 05:48:42.255 """
2025-07-01 05:48:42.263
2025-07-01 05:48:42.275 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:48:42.284 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:48:42.292 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:48:42.299 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:48:42.308 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:48:42.316
2025-07-01 05:48:42.323 # search for the pair that matches best without being identical
2025-07-01 05:48:42.332 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:48:42.343 # on junk -- unless we have to)
2025-07-01 05:48:42.355 for j in range(blo, bhi):
2025-07-01 05:48:42.367 bj = b[j]
2025-07-01 05:48:42.376 cruncher.set_seq2(bj)
2025-07-01 05:48:42.386 for i in range(alo, ahi):
2025-07-01 05:48:42.395 ai = a[i]
2025-07-01 05:48:42.401 if ai == bj:
2025-07-01 05:48:42.407 if eqi is None:
2025-07-01 05:48:42.417 eqi, eqj = i, j
2025-07-01 05:48:42.426 continue
2025-07-01 05:48:42.432 cruncher.set_seq1(ai)
2025-07-01 05:48:42.442 # computing similarity is expensive, so use the quick
2025-07-01 05:48:42.448 # upper bounds first -- have seen this speed up messy
2025-07-01 05:48:42.454 # compares by a factor of 3.
2025-07-01 05:48:42.460 # note that ratio() is only expensive to compute the first
2025-07-01 05:48:42.466 # time it's called on a sequence pair; the expensive part
2025-07-01 05:48:42.472 # of the computation is cached by cruncher
2025-07-01 05:48:42.478 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:48:42.484 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:48:42.490 cruncher.ratio() > best_ratio:
2025-07-01 05:48:42.496 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:48:42.502 if best_ratio < cutoff:
2025-07-01 05:48:42.512 # no non-identical "pretty close" pair
2025-07-01 05:48:42.522 if eqi is None:
2025-07-01 05:48:42.527 # no identical pair either -- treat it as a straight replace
2025-07-01 05:48:42.533 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:48:42.539 return
2025-07-01 05:48:42.548 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:48:42.557 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:48:42.567 else:
2025-07-01 05:48:42.578 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:48:42.586 eqi = None
2025-07-01 05:48:42.592
2025-07-01 05:48:42.600 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:48:42.607 # identical
2025-07-01 05:48:42.616
2025-07-01 05:48:42.627 # pump out diffs from before the synch point
2025-07-01 05:48:42.638 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:48:42.650
2025-07-01 05:48:42.657 # do intraline marking on the synch pair
2025-07-01 05:48:42.664 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:48:42.671 if eqi is None:
2025-07-01 05:48:42.677 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:48:42.682 atags = btags = ""
2025-07-01 05:48:42.688 cruncher.set_seqs(aelt, belt)
2025-07-01 05:48:42.694 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:48:42.703 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:48:42.710 if tag == 'replace':
2025-07-01 05:48:42.715 atags += '^' * la
2025-07-01 05:48:42.722 btags += '^' * lb
2025-07-01 05:48:42.729 elif tag == 'delete':
2025-07-01 05:48:42.735 atags += '-' * la
2025-07-01 05:48:42.743 elif tag == 'insert':
2025-07-01 05:48:42.754 btags += '+' * lb
2025-07-01 05:48:42.761 elif tag == 'equal':
2025-07-01 05:48:42.767 atags += ' ' * la
2025-07-01 05:48:42.773 btags += ' ' * lb
2025-07-01 05:48:42.780 else:
2025-07-01 05:48:42.791 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:48:42.802 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:48:42.809 else:
2025-07-01 05:48:42.815 # the synch pair is identical
2025-07-01 05:48:42.821 yield ' ' + aelt
2025-07-01 05:48:42.827
2025-07-01 05:48:42.834 # pump out diffs from after the synch point
2025-07-01 05:48:42.845 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:48:42.853
2025-07-01 05:48:42.859 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:48:42.865 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:48:42.869
2025-07-01 05:48:42.874 self = <difflib.Differ object at [hex]>
2025-07-01 05:48:42.879 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:48:42.884 alo = 319, ahi = 1101
2025-07-01 05:48:42.889 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:48:42.900 blo = 319, bhi = 1101
2025-07-01 05:48:42.910
2025-07-01 05:48:42.917 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:48:42.923 g = []
2025-07-01 05:48:42.928 if alo < ahi:
2025-07-01 05:48:42.935 if blo < bhi:
2025-07-01 05:48:42.943 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:48:42.954 else:
2025-07-01 05:48:42.962 g = self._dump('-', a, alo, ahi)
2025-07-01 05:48:42.970 elif blo < bhi:
2025-07-01 05:48:42.976 g = self._dump('+', b, blo, bhi)
2025-07-01 05:48:42.981
2025-07-01 05:48:42.987 > yield from g
2025-07-01 05:48:42.993
2025-07-01 05:48:42.999 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:48:43.005 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:48:43.015
2025-07-01 05:48:43.023 self = <difflib.Differ object at [hex]>
2025-07-01 05:48:43.036 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:48:43.044 alo = 319, ahi = 1101
2025-07-01 05:48:43.051 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:48:43.058 blo = 319, bhi = 1101
2025-07-01 05:48:43.064
2025-07-01 05:48:43.071 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:48:43.079 r"""
2025-07-01 05:48:43.090 When replacing one block of lines with another, search the blocks
2025-07-01 05:48:43.098 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:48:43.104 synch point, and intraline difference marking is done on the
2025-07-01 05:48:43.110 similar pair. Lots of work, but often worth it.
2025-07-01 05:48:43.117
2025-07-01 05:48:43.122 Example:
2025-07-01 05:48:43.129
2025-07-01 05:48:43.135 >>> d = Differ()
2025-07-01 05:48:43.142 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:48:43.150 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:48:43.161 >>> print(''.join(results), end="")
2025-07-01 05:48:43.169 - abcDefghiJkl
2025-07-01 05:48:43.188 + abcdefGhijkl
2025-07-01 05:48:43.210 """
2025-07-01 05:48:43.220
2025-07-01 05:48:43.228 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:48:43.234 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:48:43.246 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:48:43.256 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:48:43.264 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:48:43.270
2025-07-01 05:48:43.279 # search for the pair that matches best without being identical
2025-07-01 05:48:43.285 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:48:43.294 # on junk -- unless we have to)
2025-07-01 05:48:43.307 for j in range(blo, bhi):
2025-07-01 05:48:43.317 bj = b[j]
2025-07-01 05:48:43.330 cruncher.set_seq2(bj)
2025-07-01 05:48:43.342 for i in range(alo, ahi):
2025-07-01 05:48:43.354 ai = a[i]
2025-07-01 05:48:43.365 if ai == bj:
2025-07-01 05:48:43.375 if eqi is None:
2025-07-01 05:48:43.388 eqi, eqj = i, j
2025-07-01 05:48:43.400 continue
2025-07-01 05:48:43.410 cruncher.set_seq1(ai)
2025-07-01 05:48:43.418 # computing similarity is expensive, so use the quick
2025-07-01 05:48:43.430 # upper bounds first -- have seen this speed up messy
2025-07-01 05:48:43.438 # compares by a factor of 3.
2025-07-01 05:48:43.445 # note that ratio() is only expensive to compute the first
2025-07-01 05:48:43.455 # time it's called on a sequence pair; the expensive part
2025-07-01 05:48:43.470 # of the computation is cached by cruncher
2025-07-01 05:48:43.479 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:48:43.486 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:48:43.492 cruncher.ratio() > best_ratio:
2025-07-01 05:48:43.498 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:48:43.502 if best_ratio < cutoff:
2025-07-01 05:48:43.507 # no non-identical "pretty close" pair
2025-07-01 05:48:43.512 if eqi is None:
2025-07-01 05:48:43.516 # no identical pair either -- treat it as a straight replace
2025-07-01 05:48:43.522 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:48:43.530 return
2025-07-01 05:48:43.537 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:48:43.543 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:48:43.550 else:
2025-07-01 05:48:43.558 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:48:43.569 eqi = None
2025-07-01 05:48:43.578
2025-07-01 05:48:43.586 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:48:43.592 # identical
2025-07-01 05:48:43.598
2025-07-01 05:48:43.604 # pump out diffs from before the synch point
2025-07-01 05:48:43.610 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:48:43.618
2025-07-01 05:48:43.626 # do intraline marking on the synch pair
2025-07-01 05:48:43.634 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:48:43.643 if eqi is None:
2025-07-01 05:48:43.651 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:48:43.659 atags = btags = ""
2025-07-01 05:48:43.666 cruncher.set_seqs(aelt, belt)
2025-07-01 05:48:43.671 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:48:43.677 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:48:43.682 if tag == 'replace':
2025-07-01 05:48:43.687 atags += '^' * la
2025-07-01 05:48:43.698 btags += '^' * lb
2025-07-01 05:48:43.707 elif tag == 'delete':
2025-07-01 05:48:43.716 atags += '-' * la
2025-07-01 05:48:43.725 elif tag == 'insert':
2025-07-01 05:48:43.731 btags += '+' * lb
2025-07-01 05:48:43.737 elif tag == 'equal':
2025-07-01 05:48:43.743 atags += ' ' * la
2025-07-01 05:48:43.749 btags += ' ' * lb
2025-07-01 05:48:43.754 else:
2025-07-01 05:48:43.761 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:48:43.769 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:48:43.782 else:
2025-07-01 05:48:43.789 # the synch pair is identical
2025-07-01 05:48:43.794 yield ' ' + aelt
2025-07-01 05:48:43.800
2025-07-01 05:48:43.806 # pump out diffs from after the synch point
2025-07-01 05:48:43.811 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:48:43.816
2025-07-01 05:48:43.821 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:48:43.827 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:48:43.835
2025-07-01 05:48:43.842 self = <difflib.Differ object at [hex]>
2025-07-01 05:48:43.854 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:48:43.862 alo = 320, ahi = 1101
2025-07-01 05:48:43.871 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:48:43.883 blo = 320, bhi = 1101
2025-07-01 05:48:43.894
2025-07-01 05:48:43.902 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:48:43.908 g = []
2025-07-01 05:48:43.913 if alo < ahi:
2025-07-01 05:48:43.918 if blo < bhi:
2025-07-01 05:48:43.924 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:48:43.929 else:
2025-07-01 05:48:43.934 g = self._dump('-', a, alo, ahi)
2025-07-01 05:48:43.938 elif blo < bhi:
2025-07-01 05:48:43.943 g = self._dump('+', b, blo, bhi)
2025-07-01 05:48:43.947
2025-07-01 05:48:43.951 > yield from g
2025-07-01 05:48:43.956
2025-07-01 05:48:43.960 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:48:43.965 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:48:43.969
2025-07-01 05:48:43.973 self = <difflib.Differ object at [hex]>
2025-07-01 05:48:43.982 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:48:43.993 alo = 320, ahi = 1101
2025-07-01 05:48:44.004 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:48:44.011 blo = 320, bhi = 1101
2025-07-01 05:48:44.017
2025-07-01 05:48:44.023 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:48:44.030 r"""
2025-07-01 05:48:44.041 When replacing one block of lines with another, search the blocks
2025-07-01 05:48:44.052 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:48:44.062 synch point, and intraline difference marking is done on the
2025-07-01 05:48:44.073 similar pair. Lots of work, but often worth it.
2025-07-01 05:48:44.080
2025-07-01 05:48:44.086 Example:
2025-07-01 05:48:44.096
2025-07-01 05:48:44.107 >>> d = Differ()
2025-07-01 05:48:44.117 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:48:44.129 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:48:44.138 >>> print(''.join(results), end="")
2025-07-01 05:48:44.145 - abcDefghiJkl
2025-07-01 05:48:44.155 + abcdefGhijkl
2025-07-01 05:48:44.171 """
2025-07-01 05:48:44.180
2025-07-01 05:48:44.192 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:48:44.201 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:48:44.211 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:48:44.220 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:48:44.228 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:48:44.239
2025-07-01 05:48:44.251 # search for the pair that matches best without being identical
2025-07-01 05:48:44.260 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:48:44.267 # on junk -- unless we have to)
2025-07-01 05:48:44.274 for j in range(blo, bhi):
2025-07-01 05:48:44.280 bj = b[j]
2025-07-01 05:48:44.286 cruncher.set_seq2(bj)
2025-07-01 05:48:44.293 for i in range(alo, ahi):
2025-07-01 05:48:44.298 ai = a[i]
2025-07-01 05:48:44.309 if ai == bj:
2025-07-01 05:48:44.318 if eqi is None:
2025-07-01 05:48:44.328 eqi, eqj = i, j
2025-07-01 05:48:44.337 continue
2025-07-01 05:48:44.350 cruncher.set_seq1(ai)
2025-07-01 05:48:44.362 # computing similarity is expensive, so use the quick
2025-07-01 05:48:44.375 # upper bounds first -- have seen this speed up messy
2025-07-01 05:48:44.384 # compares by a factor of 3.
2025-07-01 05:48:44.392 # note that ratio() is only expensive to compute the first
2025-07-01 05:48:44.399 # time it's called on a sequence pair; the expensive part
2025-07-01 05:48:44.408 # of the computation is cached by cruncher
2025-07-01 05:48:44.421 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:48:44.432 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:48:44.442 cruncher.ratio() > best_ratio:
2025-07-01 05:48:44.454 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:48:44.460 if best_ratio < cutoff:
2025-07-01 05:48:44.465 # no non-identical "pretty close" pair
2025-07-01 05:48:44.470 if eqi is None:
2025-07-01 05:48:44.479 # no identical pair either -- treat it as a straight replace
2025-07-01 05:48:44.485 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:48:44.490 return
2025-07-01 05:48:44.496 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:48:44.501 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:48:44.505 else:
2025-07-01 05:48:44.514 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:48:44.521 eqi = None
2025-07-01 05:48:44.527
2025-07-01 05:48:44.533 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:48:44.539 # identical
2025-07-01 05:48:44.547
2025-07-01 05:48:44.557 # pump out diffs from before the synch point
2025-07-01 05:48:44.569 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:48:44.581
2025-07-01 05:48:44.590 # do intraline marking on the synch pair
2025-07-01 05:48:44.602 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:48:44.613 if eqi is None:
2025-07-01 05:48:44.623 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:48:44.631 atags = btags = ""
2025-07-01 05:48:44.639 cruncher.set_seqs(aelt, belt)
2025-07-01 05:48:44.647 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:48:44.655 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:48:44.667 if tag == 'replace':
2025-07-01 05:48:44.676 atags += '^' * la
2025-07-01 05:48:44.684 btags += '^' * lb
2025-07-01 05:48:44.690 elif tag == 'delete':
2025-07-01 05:48:44.697 atags += '-' * la
2025-07-01 05:48:44.702 elif tag == 'insert':
2025-07-01 05:48:44.712 btags += '+' * lb
2025-07-01 05:48:44.723 elif tag == 'equal':
2025-07-01 05:48:44.735 atags += ' ' * la
2025-07-01 05:48:44.746 btags += ' ' * lb
2025-07-01 05:48:44.754 else:
2025-07-01 05:48:44.768 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:48:44.776 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:48:44.787 else:
2025-07-01 05:48:44.797 # the synch pair is identical
2025-07-01 05:48:44.805 yield ' ' + aelt
2025-07-01 05:48:44.812
2025-07-01 05:48:44.818 # pump out diffs from after the synch point
2025-07-01 05:48:44.824 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:48:44.830
2025-07-01 05:48:44.835 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:48:44.843 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:48:44.854
2025-07-01 05:48:44.863 self = <difflib.Differ object at [hex]>
2025-07-01 05:48:44.872 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:48:44.879 alo = 321, ahi = 1101
2025-07-01 05:48:44.886 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:48:44.896 blo = 321, bhi = 1101
2025-07-01 05:48:44.905
2025-07-01 05:48:44.912 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:48:44.921 g = []
2025-07-01 05:48:44.931 if alo < ahi:
2025-07-01 05:48:44.940 if blo < bhi:
2025-07-01 05:48:44.947 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:48:44.953 else:
2025-07-01 05:48:44.957 g = self._dump('-', a, alo, ahi)
2025-07-01 05:48:44.962 elif blo < bhi:
2025-07-01 05:48:44.967 g = self._dump('+', b, blo, bhi)
2025-07-01 05:48:44.971
2025-07-01 05:48:44.976 > yield from g
2025-07-01 05:48:44.987
2025-07-01 05:48:44.993 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:48:45.006 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:48:45.015
2025-07-01 05:48:45.026 self = <difflib.Differ object at [hex]>
2025-07-01 05:48:45.035 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:48:45.043 alo = 321, ahi = 1101
2025-07-01 05:48:45.056 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:48:45.065 blo = 321, bhi = 1101
2025-07-01 05:48:45.073
2025-07-01 05:48:45.082 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:48:45.094 r"""
2025-07-01 05:48:45.101 When replacing one block of lines with another, search the blocks
2025-07-01 05:48:45.108 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:48:45.114 synch point, and intraline difference marking is done on the
2025-07-01 05:48:45.121 similar pair. Lots of work, but often worth it.
2025-07-01 05:48:45.128
2025-07-01 05:48:45.134 Example:
2025-07-01 05:48:45.141
2025-07-01 05:48:45.147 >>> d = Differ()
2025-07-01 05:48:45.156 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:48:45.168 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:48:45.177 >>> print(''.join(results), end="")
2025-07-01 05:48:45.185 - abcDefghiJkl
2025-07-01 05:48:45.201 + abcdefGhijkl
2025-07-01 05:48:45.222 """
2025-07-01 05:48:45.230
2025-07-01 05:48:45.238 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:48:45.251 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:48:45.262 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:48:45.273 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:48:45.283 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:48:45.291
2025-07-01 05:48:45.298 # search for the pair that matches best without being identical
2025-07-01 05:48:45.310 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:48:45.318 # on junk -- unless we have to)
2025-07-01 05:48:45.327 for j in range(blo, bhi):
2025-07-01 05:48:45.336 bj = b[j]
2025-07-01 05:48:45.342 cruncher.set_seq2(bj)
2025-07-01 05:48:45.349 for i in range(alo, ahi):
2025-07-01 05:48:45.358 ai = a[i]
2025-07-01 05:48:45.370 if ai == bj:
2025-07-01 05:48:45.378 if eqi is None:
2025-07-01 05:48:45.389 eqi, eqj = i, j
2025-07-01 05:48:45.400 continue
2025-07-01 05:48:45.411 cruncher.set_seq1(ai)
2025-07-01 05:48:45.417 # computing similarity is expensive, so use the quick
2025-07-01 05:48:45.423 # upper bounds first -- have seen this speed up messy
2025-07-01 05:48:45.430 # compares by a factor of 3.
2025-07-01 05:48:45.437 # note that ratio() is only expensive to compute the first
2025-07-01 05:48:45.450 # time it's called on a sequence pair; the expensive part
2025-07-01 05:48:45.460 # of the computation is cached by cruncher
2025-07-01 05:48:45.469 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:48:45.479 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:48:45.489 cruncher.ratio() > best_ratio:
2025-07-01 05:48:45.496 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:48:45.503 if best_ratio < cutoff:
2025-07-01 05:48:45.511 # no non-identical "pretty close" pair
2025-07-01 05:48:45.519 if eqi is None:
2025-07-01 05:48:45.530 # no identical pair either -- treat it as a straight replace
2025-07-01 05:48:45.538 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:48:45.545 return
2025-07-01 05:48:45.552 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:48:45.559 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:48:45.569 else:
2025-07-01 05:48:45.579 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:48:45.589 eqi = None
2025-07-01 05:48:45.597
2025-07-01 05:48:45.603 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:48:45.615 # identical
2025-07-01 05:48:45.624
2025-07-01 05:48:45.631 # pump out diffs from before the synch point
2025-07-01 05:48:45.643 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:48:45.654
2025-07-01 05:48:45.662 # do intraline marking on the synch pair
2025-07-01 05:48:45.668 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:48:45.674 if eqi is None:
2025-07-01 05:48:45.680 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:48:45.686 atags = btags = ""
2025-07-01 05:48:45.693 cruncher.set_seqs(aelt, belt)
2025-07-01 05:48:45.702 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:48:45.709 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:48:45.716 if tag == 'replace':
2025-07-01 05:48:45.722 atags += '^' * la
2025-07-01 05:48:45.730 btags += '^' * lb
2025-07-01 05:48:45.739 elif tag == 'delete':
2025-07-01 05:48:45.746 atags += '-' * la
2025-07-01 05:48:45.754 elif tag == 'insert':
2025-07-01 05:48:45.764 btags += '+' * lb
2025-07-01 05:48:45.777 elif tag == 'equal':
2025-07-01 05:48:45.786 atags += ' ' * la
2025-07-01 05:48:45.793 btags += ' ' * lb
2025-07-01 05:48:45.799 else:
2025-07-01 05:48:45.804 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:48:45.809 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:48:45.815 else:
2025-07-01 05:48:45.820 # the synch pair is identical
2025-07-01 05:48:45.824 yield ' ' + aelt
2025-07-01 05:48:45.829
2025-07-01 05:48:45.834 # pump out diffs from after the synch point
2025-07-01 05:48:45.838 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:48:45.843
2025-07-01 05:48:45.849 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:48:45.855 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:48:45.861
2025-07-01 05:48:45.868 self = <difflib.Differ object at [hex]>
2025-07-01 05:48:45.875 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:48:45.882 alo = 322, ahi = 1101
2025-07-01 05:48:45.890 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:48:45.899 blo = 322, bhi = 1101
2025-07-01 05:48:45.908
2025-07-01 05:48:45.916 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:48:45.923 g = []
2025-07-01 05:48:45.929 if alo < ahi:
2025-07-01 05:48:45.935 if blo < bhi:
2025-07-01 05:48:45.940 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:48:45.951 else:
2025-07-01 05:48:45.957 g = self._dump('-', a, alo, ahi)
2025-07-01 05:48:45.963 elif blo < bhi:
2025-07-01 05:48:45.967 g = self._dump('+', b, blo, bhi)
2025-07-01 05:48:45.972
2025-07-01 05:48:45.977 > yield from g
2025-07-01 05:48:45.982
2025-07-01 05:48:45.987 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:48:45.993 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:48:46.000
2025-07-01 05:48:46.006 self = <difflib.Differ object at [hex]>
2025-07-01 05:48:46.017 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:48:46.025 alo = 322, ahi = 1101
2025-07-01 05:48:46.036 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:48:46.045 blo = 322, bhi = 1101
2025-07-01 05:48:46.050
2025-07-01 05:48:46.056 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:48:46.065 r"""
2025-07-01 05:48:46.074 When replacing one block of lines with another, search the blocks
2025-07-01 05:48:46.080 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:48:46.086 synch point, and intraline difference marking is done on the
2025-07-01 05:48:46.091 similar pair. Lots of work, but often worth it.
2025-07-01 05:48:46.096
2025-07-01 05:48:46.101 Example:
2025-07-01 05:48:46.106
2025-07-01 05:48:46.111 >>> d = Differ()
2025-07-01 05:48:46.119 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:48:46.129 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:48:46.137 >>> print(''.join(results), end="")
2025-07-01 05:48:46.149 - abcDefghiJkl
2025-07-01 05:48:46.166 + abcdefGhijkl
2025-07-01 05:48:46.182 """
2025-07-01 05:48:46.196
2025-07-01 05:48:46.204 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:48:46.212 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:48:46.219 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:48:46.225 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:48:46.233 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:48:46.241
2025-07-01 05:48:46.247 # search for the pair that matches best without being identical
2025-07-01 05:48:46.252 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:48:46.256 # on junk -- unless we have to)
2025-07-01 05:48:46.265 for j in range(blo, bhi):
2025-07-01 05:48:46.278 bj = b[j]
2025-07-01 05:48:46.287 cruncher.set_seq2(bj)
2025-07-01 05:48:46.298 for i in range(alo, ahi):
2025-07-01 05:48:46.308 ai = a[i]
2025-07-01 05:48:46.315 if ai == bj:
2025-07-01 05:48:46.321 if eqi is None:
2025-07-01 05:48:46.328 eqi, eqj = i, j
2025-07-01 05:48:46.334 continue
2025-07-01 05:48:46.341 cruncher.set_seq1(ai)
2025-07-01 05:48:46.347 # computing similarity is expensive, so use the quick
2025-07-01 05:48:46.353 # upper bounds first -- have seen this speed up messy
2025-07-01 05:48:46.359 # compares by a factor of 3.
2025-07-01 05:48:46.365 # note that ratio() is only expensive to compute the first
2025-07-01 05:48:46.371 # time it's called on a sequence pair; the expensive part
2025-07-01 05:48:46.377 # of the computation is cached by cruncher
2025-07-01 05:48:46.383 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:48:46.390 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:48:46.397 cruncher.ratio() > best_ratio:
2025-07-01 05:48:46.403 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:48:46.409 if best_ratio < cutoff:
2025-07-01 05:48:46.414 # no non-identical "pretty close" pair
2025-07-01 05:48:46.425 if eqi is None:
2025-07-01 05:48:46.434 # no identical pair either -- treat it as a straight replace
2025-07-01 05:48:46.441 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:48:46.448 return
2025-07-01 05:48:46.454 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:48:46.459 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:48:46.465 else:
2025-07-01 05:48:46.471 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:48:46.477 eqi = None
2025-07-01 05:48:46.483
2025-07-01 05:48:46.489 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:48:46.494 # identical
2025-07-01 05:48:46.500
2025-07-01 05:48:46.513 # pump out diffs from before the synch point
2025-07-01 05:48:46.520 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:48:46.527
2025-07-01 05:48:46.533 # do intraline marking on the synch pair
2025-07-01 05:48:46.540 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:48:46.547 if eqi is None:
2025-07-01 05:48:46.554 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:48:46.561 atags = btags = ""
2025-07-01 05:48:46.568 cruncher.set_seqs(aelt, belt)
2025-07-01 05:48:46.575 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:48:46.581 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:48:46.588 if tag == 'replace':
2025-07-01 05:48:46.596 atags += '^' * la
2025-07-01 05:48:46.606 btags += '^' * lb
2025-07-01 05:48:46.618 elif tag == 'delete':
2025-07-01 05:48:46.627 atags += '-' * la
2025-07-01 05:48:46.633 elif tag == 'insert':
2025-07-01 05:48:46.642 btags += '+' * lb
2025-07-01 05:48:46.652 elif tag == 'equal':
2025-07-01 05:48:46.660 atags += ' ' * la
2025-07-01 05:48:46.666 btags += ' ' * lb
2025-07-01 05:48:46.675 else:
2025-07-01 05:48:46.681 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:48:46.687 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:48:46.695 else:
2025-07-01 05:48:46.705 # the synch pair is identical
2025-07-01 05:48:46.713 yield ' ' + aelt
2025-07-01 05:48:46.718
2025-07-01 05:48:46.724 # pump out diffs from after the synch point
2025-07-01 05:48:46.730 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:48:46.737
2025-07-01 05:48:46.744 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:48:46.751 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:48:46.759
2025-07-01 05:48:46.769 self = <difflib.Differ object at [hex]>
2025-07-01 05:48:46.783 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:48:46.793 alo = 323, ahi = 1101
2025-07-01 05:48:46.803 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:48:46.813 blo = 323, bhi = 1101
2025-07-01 05:48:46.824
2025-07-01 05:48:46.835 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:48:46.848 g = []
2025-07-01 05:48:46.860 if alo < ahi:
2025-07-01 05:48:46.868 if blo < bhi:
2025-07-01 05:48:46.880 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:48:46.893 else:
2025-07-01 05:48:46.905 g = self._dump('-', a, alo, ahi)
2025-07-01 05:48:46.914 elif blo < bhi:
2025-07-01 05:48:46.921 g = self._dump('+', b, blo, bhi)
2025-07-01 05:48:46.927
2025-07-01 05:48:46.932 > yield from g
2025-07-01 05:48:46.938
2025-07-01 05:48:46.943 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:48:46.952 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:48:46.965
2025-07-01 05:48:46.973 self = <difflib.Differ object at [hex]>
2025-07-01 05:48:46.982 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:48:46.988 alo = 323, ahi = 1101
2025-07-01 05:48:46.996 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:48:47.006 blo = 323, bhi = 1101
2025-07-01 05:48:47.014
2025-07-01 05:48:47.024 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:48:47.033 r"""
2025-07-01 05:48:47.044 When replacing one block of lines with another, search the blocks
2025-07-01 05:48:47.056 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:48:47.067 synch point, and intraline difference marking is done on the
2025-07-01 05:48:47.079 similar pair. Lots of work, but often worth it.
2025-07-01 05:48:47.089
2025-07-01 05:48:47.098 Example:
2025-07-01 05:48:47.109
2025-07-01 05:48:47.118 >>> d = Differ()
2025-07-01 05:48:47.127 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:48:47.138 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:48:47.149 >>> print(''.join(results), end="")
2025-07-01 05:48:47.161 - abcDefghiJkl
2025-07-01 05:48:47.182 + abcdefGhijkl
2025-07-01 05:48:47.199 """
2025-07-01 05:48:47.206
2025-07-01 05:48:47.212 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:48:47.218 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:48:47.225 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:48:47.232 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:48:47.238 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:48:47.243
2025-07-01 05:48:47.249 # search for the pair that matches best without being identical
2025-07-01 05:48:47.255 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:48:47.260 # on junk -- unless we have to)
2025-07-01 05:48:47.265 for j in range(blo, bhi):
2025-07-01 05:48:47.271 bj = b[j]
2025-07-01 05:48:47.277 cruncher.set_seq2(bj)
2025-07-01 05:48:47.282 for i in range(alo, ahi):
2025-07-01 05:48:47.288 ai = a[i]
2025-07-01 05:48:47.294 if ai == bj:
2025-07-01 05:48:47.300 if eqi is None:
2025-07-01 05:48:47.306 eqi, eqj = i, j
2025-07-01 05:48:47.311 continue
2025-07-01 05:48:47.317 cruncher.set_seq1(ai)
2025-07-01 05:48:47.323 # computing similarity is expensive, so use the quick
2025-07-01 05:48:47.330 # upper bounds first -- have seen this speed up messy
2025-07-01 05:48:47.336 # compares by a factor of 3.
2025-07-01 05:48:47.342 # note that ratio() is only expensive to compute the first
2025-07-01 05:48:47.347 # time it's called on a sequence pair; the expensive part
2025-07-01 05:48:47.353 # of the computation is cached by cruncher
2025-07-01 05:48:47.360 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:48:47.365 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:48:47.371 cruncher.ratio() > best_ratio:
2025-07-01 05:48:47.379 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:48:47.390 if best_ratio < cutoff:
2025-07-01 05:48:47.400 # no non-identical "pretty close" pair
2025-07-01 05:48:47.407 if eqi is None:
2025-07-01 05:48:47.413 # no identical pair either -- treat it as a straight replace
2025-07-01 05:48:47.419 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:48:47.428 return
2025-07-01 05:48:47.436 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:48:47.442 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:48:47.447 else:
2025-07-01 05:48:47.452 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:48:47.457 eqi = None
2025-07-01 05:48:47.462
2025-07-01 05:48:47.467 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:48:47.471 # identical
2025-07-01 05:48:47.475
2025-07-01 05:48:47.483 # pump out diffs from before the synch point
2025-07-01 05:48:47.489 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:48:47.497
2025-07-01 05:48:47.504 # do intraline marking on the synch pair
2025-07-01 05:48:47.511 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:48:47.517 if eqi is None:
2025-07-01 05:48:47.523 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:48:47.534 atags = btags = ""
2025-07-01 05:48:47.541 cruncher.set_seqs(aelt, belt)
2025-07-01 05:48:47.546 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:48:47.552 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:48:47.558 if tag == 'replace':
2025-07-01 05:48:47.566 atags += '^' * la
2025-07-01 05:48:47.574 btags += '^' * lb
2025-07-01 05:48:47.580 elif tag == 'delete':
2025-07-01 05:48:47.586 atags += '-' * la
2025-07-01 05:48:47.591 elif tag == 'insert':
2025-07-01 05:48:47.596 btags += '+' * lb
2025-07-01 05:48:47.602 elif tag == 'equal':
2025-07-01 05:48:47.616 atags += ' ' * la
2025-07-01 05:48:47.627 btags += ' ' * lb
2025-07-01 05:48:47.639 else:
2025-07-01 05:48:47.652 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:48:47.663 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:48:47.671 else:
2025-07-01 05:48:47.678 # the synch pair is identical
2025-07-01 05:48:47.685 yield ' ' + aelt
2025-07-01 05:48:47.691
2025-07-01 05:48:47.697 # pump out diffs from after the synch point
2025-07-01 05:48:47.702 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:48:47.713
2025-07-01 05:48:47.721 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:48:47.730 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:48:47.738
2025-07-01 05:48:47.747 self = <difflib.Differ object at [hex]>
2025-07-01 05:48:47.758 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:48:47.770 alo = 324, ahi = 1101
2025-07-01 05:48:47.783 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:48:47.793 blo = 324, bhi = 1101
2025-07-01 05:48:47.806
2025-07-01 05:48:47.816 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:48:47.825 g = []
2025-07-01 05:48:47.832 if alo < ahi:
2025-07-01 05:48:47.842 if blo < bhi:
2025-07-01 05:48:47.853 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:48:47.863 else:
2025-07-01 05:48:47.876 g = self._dump('-', a, alo, ahi)
2025-07-01 05:48:47.888 elif blo < bhi:
2025-07-01 05:48:47.903 g = self._dump('+', b, blo, bhi)
2025-07-01 05:48:47.914
2025-07-01 05:48:47.923 > yield from g
2025-07-01 05:48:47.930
2025-07-01 05:48:47.942 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:48:47.953 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:48:47.961
2025-07-01 05:48:47.968 self = <difflib.Differ object at [hex]>
2025-07-01 05:48:47.974 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:48:47.980 alo = 324, ahi = 1101
2025-07-01 05:48:47.986 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:48:47.991 blo = 324, bhi = 1101
2025-07-01 05:48:47.996
2025-07-01 05:48:48.001 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:48:48.007 r"""
2025-07-01 05:48:48.012 When replacing one block of lines with another, search the blocks
2025-07-01 05:48:48.016 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:48:48.021 synch point, and intraline difference marking is done on the
2025-07-01 05:48:48.030 similar pair. Lots of work, but often worth it.
2025-07-01 05:48:48.039
2025-07-01 05:48:48.050 Example:
2025-07-01 05:48:48.063
2025-07-01 05:48:48.074 >>> d = Differ()
2025-07-01 05:48:48.086 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:48:48.097 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:48:48.108 >>> print(''.join(results), end="")
2025-07-01 05:48:48.120 - abcDefghiJkl
2025-07-01 05:48:48.143 + abcdefGhijkl
2025-07-01 05:48:48.163 """
2025-07-01 05:48:48.174
2025-07-01 05:48:48.186 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:48:48.197 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:48:48.210 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:48:48.220 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:48:48.231 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:48:48.241
2025-07-01 05:48:48.254 # search for the pair that matches best without being identical
2025-07-01 05:48:48.265 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:48:48.276 # on junk -- unless we have to)
2025-07-01 05:48:48.288 for j in range(blo, bhi):
2025-07-01 05:48:48.301 bj = b[j]
2025-07-01 05:48:48.310 cruncher.set_seq2(bj)
2025-07-01 05:48:48.319 for i in range(alo, ahi):
2025-07-01 05:48:48.326 ai = a[i]
2025-07-01 05:48:48.333 if ai == bj:
2025-07-01 05:48:48.340 if eqi is None:
2025-07-01 05:48:48.347 eqi, eqj = i, j
2025-07-01 05:48:48.355 continue
2025-07-01 05:48:48.366 cruncher.set_seq1(ai)
2025-07-01 05:48:48.374 # computing similarity is expensive, so use the quick
2025-07-01 05:48:48.380 # upper bounds first -- have seen this speed up messy
2025-07-01 05:48:48.385 # compares by a factor of 3.
2025-07-01 05:48:48.393 # note that ratio() is only expensive to compute the first
2025-07-01 05:48:48.399 # time it's called on a sequence pair; the expensive part
2025-07-01 05:48:48.406 # of the computation is cached by cruncher
2025-07-01 05:48:48.417 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:48:48.427 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:48:48.435 cruncher.ratio() > best_ratio:
2025-07-01 05:48:48.442 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:48:48.448 if best_ratio < cutoff:
2025-07-01 05:48:48.454 # no non-identical "pretty close" pair
2025-07-01 05:48:48.460 if eqi is None:
2025-07-01 05:48:48.466 # no identical pair either -- treat it as a straight replace
2025-07-01 05:48:48.471 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:48:48.479 return
2025-07-01 05:48:48.487 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:48:48.495 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:48:48.503 else:
2025-07-01 05:48:48.512 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:48:48.525 eqi = None
2025-07-01 05:48:48.534
2025-07-01 05:48:48.546 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:48:48.556 # identical
2025-07-01 05:48:48.562
2025-07-01 05:48:48.570 # pump out diffs from before the synch point
2025-07-01 05:48:48.578 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:48:48.585
2025-07-01 05:48:48.592 # do intraline marking on the synch pair
2025-07-01 05:48:48.599 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:48:48.606 if eqi is None:
2025-07-01 05:48:48.618 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:48:48.627 atags = btags = ""
2025-07-01 05:48:48.635 cruncher.set_seqs(aelt, belt)
2025-07-01 05:48:48.643 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:48:48.654 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:48:48.665 if tag == 'replace':
2025-07-01 05:48:48.676 atags += '^' * la
2025-07-01 05:48:48.684 btags += '^' * lb
2025-07-01 05:48:48.690 elif tag == 'delete':
2025-07-01 05:48:48.696 atags += '-' * la
2025-07-01 05:48:48.704 elif tag == 'insert':
2025-07-01 05:48:48.711 btags += '+' * lb
2025-07-01 05:48:48.718 elif tag == 'equal':
2025-07-01 05:48:48.728 atags += ' ' * la
2025-07-01 05:48:48.741 btags += ' ' * lb
2025-07-01 05:48:48.751 else:
2025-07-01 05:48:48.760 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:48:48.767 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:48:48.775 else:
2025-07-01 05:48:48.786 # the synch pair is identical
2025-07-01 05:48:48.795 yield ' ' + aelt
2025-07-01 05:48:48.802
2025-07-01 05:48:48.809 # pump out diffs from after the synch point
2025-07-01 05:48:48.823 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:48:48.834
2025-07-01 05:48:48.844 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:48:48.855 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:48:48.866
2025-07-01 05:48:48.877 self = <difflib.Differ object at [hex]>
2025-07-01 05:48:48.888 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:48:48.900 alo = 325, ahi = 1101
2025-07-01 05:48:48.914 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:48:48.926 blo = 325, bhi = 1101
2025-07-01 05:48:48.936
2025-07-01 05:48:48.949 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:48:48.957 g = []
2025-07-01 05:48:48.965 if alo < ahi:
2025-07-01 05:48:48.972 if blo < bhi:
2025-07-01 05:48:48.986 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:48:48.995 else:
2025-07-01 05:48:49.002 g = self._dump('-', a, alo, ahi)
2025-07-01 05:48:49.009 elif blo < bhi:
2025-07-01 05:48:49.015 g = self._dump('+', b, blo, bhi)
2025-07-01 05:48:49.022
2025-07-01 05:48:49.029 > yield from g
2025-07-01 05:48:49.035
2025-07-01 05:48:49.042 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:48:49.049 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:48:49.055
2025-07-01 05:48:49.062 self = <difflib.Differ object at [hex]>
2025-07-01 05:48:49.071 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:48:49.082 alo = 325, ahi = 1101
2025-07-01 05:48:49.092 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:48:49.099 blo = 325, bhi = 1101
2025-07-01 05:48:49.104
2025-07-01 05:48:49.109 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:48:49.114 r"""
2025-07-01 05:48:49.119 When replacing one block of lines with another, search the blocks
2025-07-01 05:48:49.123 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:48:49.138 synch point, and intraline difference marking is done on the
2025-07-01 05:48:49.150 similar pair. Lots of work, but often worth it.
2025-07-01 05:48:49.158
2025-07-01 05:48:49.166 Example:
2025-07-01 05:48:49.171
2025-07-01 05:48:49.176 >>> d = Differ()
2025-07-01 05:48:49.181 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:48:49.185 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:48:49.190 >>> print(''.join(results), end="")
2025-07-01 05:48:49.194 - abcDefghiJkl
2025-07-01 05:48:49.204 + abcdefGhijkl
2025-07-01 05:48:49.225 """
2025-07-01 05:48:49.235
2025-07-01 05:48:49.245 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:48:49.257 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:48:49.268 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:48:49.278 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:48:49.289 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:48:49.300
2025-07-01 05:48:49.312 # search for the pair that matches best without being identical
2025-07-01 05:48:49.321 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:48:49.334 # on junk -- unless we have to)
2025-07-01 05:48:49.343 for j in range(blo, bhi):
2025-07-01 05:48:49.353 bj = b[j]
2025-07-01 05:48:49.365 cruncher.set_seq2(bj)
2025-07-01 05:48:49.371 for i in range(alo, ahi):
2025-07-01 05:48:49.377 ai = a[i]
2025-07-01 05:48:49.382 if ai == bj:
2025-07-01 05:48:49.388 if eqi is None:
2025-07-01 05:48:49.394 eqi, eqj = i, j
2025-07-01 05:48:49.400 continue
2025-07-01 05:48:49.406 cruncher.set_seq1(ai)
2025-07-01 05:48:49.412 # computing similarity is expensive, so use the quick
2025-07-01 05:48:49.419 # upper bounds first -- have seen this speed up messy
2025-07-01 05:48:49.424 # compares by a factor of 3.
2025-07-01 05:48:49.430 # note that ratio() is only expensive to compute the first
2025-07-01 05:48:49.435 # time it's called on a sequence pair; the expensive part
2025-07-01 05:48:49.440 # of the computation is cached by cruncher
2025-07-01 05:48:49.446 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:48:49.456 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:48:49.468 cruncher.ratio() > best_ratio:
2025-07-01 05:48:49.479 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:48:49.490 if best_ratio < cutoff:
2025-07-01 05:48:49.499 # no non-identical "pretty close" pair
2025-07-01 05:48:49.505 if eqi is None:
2025-07-01 05:48:49.511 # no identical pair either -- treat it as a straight replace
2025-07-01 05:48:49.516 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:48:49.523 return
2025-07-01 05:48:49.531 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:48:49.538 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:48:49.549 else:
2025-07-01 05:48:49.559 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:48:49.568 eqi = None
2025-07-01 05:48:49.577
2025-07-01 05:48:49.586 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:48:49.594 # identical
2025-07-01 05:48:49.602
2025-07-01 05:48:49.609 # pump out diffs from before the synch point
2025-07-01 05:48:49.622 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:48:49.633
2025-07-01 05:48:49.641 # do intraline marking on the synch pair
2025-07-01 05:48:49.651 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:48:49.661 if eqi is None:
2025-07-01 05:48:49.670 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:48:49.678 atags = btags = ""
2025-07-01 05:48:49.684 cruncher.set_seqs(aelt, belt)
2025-07-01 05:48:49.690 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:48:49.696 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:48:49.704 if tag == 'replace':
2025-07-01 05:48:49.712 atags += '^' * la
2025-07-01 05:48:49.719 btags += '^' * lb
2025-07-01 05:48:49.725 elif tag == 'delete':
2025-07-01 05:48:49.730 atags += '-' * la
2025-07-01 05:48:49.741 elif tag == 'insert':
2025-07-01 05:48:49.751 btags += '+' * lb
2025-07-01 05:48:49.760 elif tag == 'equal':
2025-07-01 05:48:49.767 atags += ' ' * la
2025-07-01 05:48:49.774 btags += ' ' * lb
2025-07-01 05:48:49.780 else:
2025-07-01 05:48:49.786 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:48:49.791 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:48:49.799 else:
2025-07-01 05:48:49.809 # the synch pair is identical
2025-07-01 05:48:49.819 yield ' ' + aelt
2025-07-01 05:48:49.828
2025-07-01 05:48:49.835 # pump out diffs from after the synch point
2025-07-01 05:48:49.843 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:48:49.850
2025-07-01 05:48:49.857 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:48:49.869 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:48:49.878
2025-07-01 05:48:49.886 self = <difflib.Differ object at [hex]>
2025-07-01 05:48:49.895 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:48:49.908 alo = 326, ahi = 1101
2025-07-01 05:48:49.921 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:48:49.929 blo = 326, bhi = 1101
2025-07-01 05:48:49.935
2025-07-01 05:48:49.941 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:48:49.946 g = []
2025-07-01 05:48:49.951 if alo < ahi:
2025-07-01 05:48:49.959 if blo < bhi:
2025-07-01 05:48:49.967 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:48:49.975 else:
2025-07-01 05:48:49.981 g = self._dump('-', a, alo, ahi)
2025-07-01 05:48:49.987 elif blo < bhi:
2025-07-01 05:48:49.995 g = self._dump('+', b, blo, bhi)
2025-07-01 05:48:50.005
2025-07-01 05:48:50.014 > yield from g
2025-07-01 05:48:50.021
2025-07-01 05:48:50.028 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:48:50.036 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:48:50.043
2025-07-01 05:48:50.049 self = <difflib.Differ object at [hex]>
2025-07-01 05:48:50.059 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:48:50.071 alo = 326, ahi = 1101
2025-07-01 05:48:50.081 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:48:50.088 blo = 326, bhi = 1101
2025-07-01 05:48:50.094
2025-07-01 05:48:50.099 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:48:50.104 r"""
2025-07-01 05:48:50.110 When replacing one block of lines with another, search the blocks
2025-07-01 05:48:50.116 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:48:50.121 synch point, and intraline difference marking is done on the
2025-07-01 05:48:50.126 similar pair. Lots of work, but often worth it.
2025-07-01 05:48:50.131
2025-07-01 05:48:50.137 Example:
2025-07-01 05:48:50.142
2025-07-01 05:48:50.146 >>> d = Differ()
2025-07-01 05:48:50.152 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:48:50.157 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:48:50.162 >>> print(''.join(results), end="")
2025-07-01 05:48:50.167 - abcDefghiJkl
2025-07-01 05:48:50.177 + abcdefGhijkl
2025-07-01 05:48:50.186 """
2025-07-01 05:48:50.191
2025-07-01 05:48:50.196 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:48:50.201 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:48:50.205 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:48:50.210 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:48:50.215 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:48:50.220
2025-07-01 05:48:50.225 # search for the pair that matches best without being identical
2025-07-01 05:48:50.229 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:48:50.234 # on junk -- unless we have to)
2025-07-01 05:48:50.239 for j in range(blo, bhi):
2025-07-01 05:48:50.244 bj = b[j]
2025-07-01 05:48:50.249 cruncher.set_seq2(bj)
2025-07-01 05:48:50.254 for i in range(alo, ahi):
2025-07-01 05:48:50.259 ai = a[i]
2025-07-01 05:48:50.263 if ai == bj:
2025-07-01 05:48:50.268 if eqi is None:
2025-07-01 05:48:50.273 eqi, eqj = i, j
2025-07-01 05:48:50.277 continue
2025-07-01 05:48:50.282 cruncher.set_seq1(ai)
2025-07-01 05:48:50.287 # computing similarity is expensive, so use the quick
2025-07-01 05:48:50.291 # upper bounds first -- have seen this speed up messy
2025-07-01 05:48:50.296 # compares by a factor of 3.
2025-07-01 05:48:50.301 # note that ratio() is only expensive to compute the first
2025-07-01 05:48:50.306 # time it's called on a sequence pair; the expensive part
2025-07-01 05:48:50.311 # of the computation is cached by cruncher
2025-07-01 05:48:50.316 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:48:50.320 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:48:50.325 cruncher.ratio() > best_ratio:
2025-07-01 05:48:50.330 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:48:50.334 if best_ratio < cutoff:
2025-07-01 05:48:50.339 # no non-identical "pretty close" pair
2025-07-01 05:48:50.344 if eqi is None:
2025-07-01 05:48:50.350 # no identical pair either -- treat it as a straight replace
2025-07-01 05:48:50.355 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:48:50.359 return
2025-07-01 05:48:50.364 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:48:50.369 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:48:50.374 else:
2025-07-01 05:48:50.379 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:48:50.384 eqi = None
2025-07-01 05:48:50.388
2025-07-01 05:48:50.393 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:48:50.398 # identical
2025-07-01 05:48:50.402
2025-07-01 05:48:50.407 # pump out diffs from before the synch point
2025-07-01 05:48:50.412 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:48:50.416
2025-07-01 05:48:50.421 # do intraline marking on the synch pair
2025-07-01 05:48:50.426 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:48:50.430 if eqi is None:
2025-07-01 05:48:50.436 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:48:50.441 atags = btags = ""
2025-07-01 05:48:50.447 cruncher.set_seqs(aelt, belt)
2025-07-01 05:48:50.455 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:48:50.466 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:48:50.475 if tag == 'replace':
2025-07-01 05:48:50.482 atags += '^' * la
2025-07-01 05:48:50.492 btags += '^' * lb
2025-07-01 05:48:50.503 elif tag == 'delete':
2025-07-01 05:48:50.510 atags += '-' * la
2025-07-01 05:48:50.516 elif tag == 'insert':
2025-07-01 05:48:50.522 btags += '+' * lb
2025-07-01 05:48:50.528 elif tag == 'equal':
2025-07-01 05:48:50.534 atags += ' ' * la
2025-07-01 05:48:50.541 btags += ' ' * lb
2025-07-01 05:48:50.546 else:
2025-07-01 05:48:50.551 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:48:50.555 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:48:50.560 else:
2025-07-01 05:48:50.565 # the synch pair is identical
2025-07-01 05:48:50.569 yield ' ' + aelt
2025-07-01 05:48:50.574
2025-07-01 05:48:50.578 # pump out diffs from after the synch point
2025-07-01 05:48:50.583 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:48:50.587
2025-07-01 05:48:50.592 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:48:50.596 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:48:50.601
2025-07-01 05:48:50.605 self = <difflib.Differ object at [hex]>
2025-07-01 05:48:50.612 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:48:50.617 alo = 327, ahi = 1101
2025-07-01 05:48:50.622 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:48:50.626 blo = 327, bhi = 1101
2025-07-01 05:48:50.630
2025-07-01 05:48:50.635 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:48:50.639 g = []
2025-07-01 05:48:50.644 if alo < ahi:
2025-07-01 05:48:50.648 if blo < bhi:
2025-07-01 05:48:50.652 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:48:50.657 else:
2025-07-01 05:48:50.663 g = self._dump('-', a, alo, ahi)
2025-07-01 05:48:50.669 elif blo < bhi:
2025-07-01 05:48:50.676 g = self._dump('+', b, blo, bhi)
2025-07-01 05:48:50.682
2025-07-01 05:48:50.687 > yield from g
2025-07-01 05:48:50.693
2025-07-01 05:48:50.699 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:48:50.707 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:48:50.717
2025-07-01 05:48:50.724 self = <difflib.Differ object at [hex]>
2025-07-01 05:48:50.732 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:48:50.740 alo = 327, ahi = 1101
2025-07-01 05:48:50.748 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:48:50.754 blo = 327, bhi = 1101
2025-07-01 05:48:50.760
2025-07-01 05:48:50.772 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:48:50.782 r"""
2025-07-01 05:48:50.793 When replacing one block of lines with another, search the blocks
2025-07-01 05:48:50.804 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:48:50.817 synch point, and intraline difference marking is done on the
2025-07-01 05:48:50.827 similar pair. Lots of work, but often worth it.
2025-07-01 05:48:50.836
2025-07-01 05:48:50.842 Example:
2025-07-01 05:48:50.848
2025-07-01 05:48:50.854 >>> d = Differ()
2025-07-01 05:48:50.866 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:48:50.876 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:48:50.884 >>> print(''.join(results), end="")
2025-07-01 05:48:50.890 - abcDefghiJkl
2025-07-01 05:48:50.902 + abcdefGhijkl
2025-07-01 05:48:50.920 """
2025-07-01 05:48:50.927
2025-07-01 05:48:50.932 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:48:50.938 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:48:50.945 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:48:50.952 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:48:50.958 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:48:50.968
2025-07-01 05:48:50.978 # search for the pair that matches best without being identical
2025-07-01 05:48:50.986 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:48:50.993 # on junk -- unless we have to)
2025-07-01 05:48:50.999 for j in range(blo, bhi):
2025-07-01 05:48:51.005 bj = b[j]
2025-07-01 05:48:51.011 cruncher.set_seq2(bj)
2025-07-01 05:48:51.016 for i in range(alo, ahi):
2025-07-01 05:48:51.021 ai = a[i]
2025-07-01 05:48:51.026 if ai == bj:
2025-07-01 05:48:51.030 if eqi is None:
2025-07-01 05:48:51.035 eqi, eqj = i, j
2025-07-01 05:48:51.040 continue
2025-07-01 05:48:51.045 cruncher.set_seq1(ai)
2025-07-01 05:48:51.052 # computing similarity is expensive, so use the quick
2025-07-01 05:48:51.058 # upper bounds first -- have seen this speed up messy
2025-07-01 05:48:51.064 # compares by a factor of 3.
2025-07-01 05:48:51.070 # note that ratio() is only expensive to compute the first
2025-07-01 05:48:51.077 # time it's called on a sequence pair; the expensive part
2025-07-01 05:48:51.083 # of the computation is cached by cruncher
2025-07-01 05:48:51.089 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:48:51.095 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:48:51.102 cruncher.ratio() > best_ratio:
2025-07-01 05:48:51.112 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:48:51.124 if best_ratio < cutoff:
2025-07-01 05:48:51.135 # no non-identical "pretty close" pair
2025-07-01 05:48:51.145 if eqi is None:
2025-07-01 05:48:51.157 # no identical pair either -- treat it as a straight replace
2025-07-01 05:48:51.170 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:48:51.180 return
2025-07-01 05:48:51.188 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:48:51.195 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:48:51.203 else:
2025-07-01 05:48:51.215 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:48:51.228 eqi = None
2025-07-01 05:48:51.236
2025-07-01 05:48:51.244 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:48:51.251 # identical
2025-07-01 05:48:51.256
2025-07-01 05:48:51.262 # pump out diffs from before the synch point
2025-07-01 05:48:51.268 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:48:51.274
2025-07-01 05:48:51.279 # do intraline marking on the synch pair
2025-07-01 05:48:51.285 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:48:51.291 if eqi is None:
2025-07-01 05:48:51.297 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:48:51.302 atags = btags = ""
2025-07-01 05:48:51.308 cruncher.set_seqs(aelt, belt)
2025-07-01 05:48:51.314 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:48:51.321 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:48:51.326 if tag == 'replace':
2025-07-01 05:48:51.332 atags += '^' * la
2025-07-01 05:48:51.338 btags += '^' * lb
2025-07-01 05:48:51.344 elif tag == 'delete':
2025-07-01 05:48:51.351 atags += '-' * la
2025-07-01 05:48:51.362 elif tag == 'insert':
2025-07-01 05:48:51.371 btags += '+' * lb
2025-07-01 05:48:51.382 elif tag == 'equal':
2025-07-01 05:48:51.391 atags += ' ' * la
2025-07-01 05:48:51.396 btags += ' ' * lb
2025-07-01 05:48:51.402 else:
2025-07-01 05:48:51.408 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:48:51.414 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:48:51.419 else:
2025-07-01 05:48:51.425 # the synch pair is identical
2025-07-01 05:48:51.430 yield ' ' + aelt
2025-07-01 05:48:51.436
2025-07-01 05:48:51.442 # pump out diffs from after the synch point
2025-07-01 05:48:51.448 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:48:51.456
2025-07-01 05:48:51.465 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:48:51.473 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:48:51.479
2025-07-01 05:48:51.485 self = <difflib.Differ object at [hex]>
2025-07-01 05:48:51.492 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:48:51.504 alo = 328, ahi = 1101
2025-07-01 05:48:51.515 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:48:51.526 blo = 328, bhi = 1101
2025-07-01 05:48:51.538
2025-07-01 05:48:51.550 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:48:51.559 g = []
2025-07-01 05:48:51.569 if alo < ahi:
2025-07-01 05:48:51.580 if blo < bhi:
2025-07-01 05:48:51.587 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:48:51.593 else:
2025-07-01 05:48:51.599 g = self._dump('-', a, alo, ahi)
2025-07-01 05:48:51.607 elif blo < bhi:
2025-07-01 05:48:51.617 g = self._dump('+', b, blo, bhi)
2025-07-01 05:48:51.625
2025-07-01 05:48:51.632 > yield from g
2025-07-01 05:48:51.644
2025-07-01 05:48:51.651 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:48:51.658 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:48:51.666
2025-07-01 05:48:51.676 self = <difflib.Differ object at [hex]>
2025-07-01 05:48:51.692 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:48:51.701 alo = 328, ahi = 1101
2025-07-01 05:48:51.711 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:48:51.720 blo = 328, bhi = 1101
2025-07-01 05:48:51.732
2025-07-01 05:48:51.742 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:48:51.753 r"""
2025-07-01 05:48:51.763 When replacing one block of lines with another, search the blocks
2025-07-01 05:48:51.771 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:48:51.781 synch point, and intraline difference marking is done on the
2025-07-01 05:48:51.791 similar pair. Lots of work, but often worth it.
2025-07-01 05:48:51.797
2025-07-01 05:48:51.803 Example:
2025-07-01 05:48:51.809
2025-07-01 05:48:51.815 >>> d = Differ()
2025-07-01 05:48:51.821 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:48:51.826 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:48:51.832 >>> print(''.join(results), end="")
2025-07-01 05:48:51.839 - abcDefghiJkl
2025-07-01 05:48:51.858 + abcdefGhijkl
2025-07-01 05:48:51.874 """
2025-07-01 05:48:51.881
2025-07-01 05:48:51.887 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:48:51.895 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:48:51.906 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:48:51.914 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:48:51.921 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:48:51.927
2025-07-01 05:48:51.933 # search for the pair that matches best without being identical
2025-07-01 05:48:51.940 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:48:51.946 # on junk -- unless we have to)
2025-07-01 05:48:51.951 for j in range(blo, bhi):
2025-07-01 05:48:51.958 bj = b[j]
2025-07-01 05:48:51.968 cruncher.set_seq2(bj)
2025-07-01 05:48:51.982 for i in range(alo, ahi):
2025-07-01 05:48:51.995 ai = a[i]
2025-07-01 05:48:52.006 if ai == bj:
2025-07-01 05:48:52.016 if eqi is None:
2025-07-01 05:48:52.023 eqi, eqj = i, j
2025-07-01 05:48:52.029 continue
2025-07-01 05:48:52.034 cruncher.set_seq1(ai)
2025-07-01 05:48:52.039 # computing similarity is expensive, so use the quick
2025-07-01 05:48:52.045 # upper bounds first -- have seen this speed up messy
2025-07-01 05:48:52.053 # compares by a factor of 3.
2025-07-01 05:48:52.059 # note that ratio() is only expensive to compute the first
2025-07-01 05:48:52.066 # time it's called on a sequence pair; the expensive part
2025-07-01 05:48:52.073 # of the computation is cached by cruncher
2025-07-01 05:48:52.079 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:48:52.084 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:48:52.090 cruncher.ratio() > best_ratio:
2025-07-01 05:48:52.096 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:48:52.103 if best_ratio < cutoff:
2025-07-01 05:48:52.114 # no non-identical "pretty close" pair
2025-07-01 05:48:52.123 if eqi is None:
2025-07-01 05:48:52.130 # no identical pair either -- treat it as a straight replace
2025-07-01 05:48:52.139 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:48:52.150 return
2025-07-01 05:48:52.159 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:48:52.166 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:48:52.174 else:
2025-07-01 05:48:52.185 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:48:52.195 eqi = None
2025-07-01 05:48:52.206
2025-07-01 05:48:52.216 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:48:52.223 # identical
2025-07-01 05:48:52.231
2025-07-01 05:48:52.239 # pump out diffs from before the synch point
2025-07-01 05:48:52.251 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:48:52.260
2025-07-01 05:48:52.268 # do intraline marking on the synch pair
2025-07-01 05:48:52.275 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:48:52.283 if eqi is None:
2025-07-01 05:48:52.295 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:48:52.304 atags = btags = ""
2025-07-01 05:48:52.313 cruncher.set_seqs(aelt, belt)
2025-07-01 05:48:52.321 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:48:52.330 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:48:52.342 if tag == 'replace':
2025-07-01 05:48:52.352 atags += '^' * la
2025-07-01 05:48:52.358 btags += '^' * lb
2025-07-01 05:48:52.363 elif tag == 'delete':
2025-07-01 05:48:52.370 atags += '-' * la
2025-07-01 05:48:52.376 elif tag == 'insert':
2025-07-01 05:48:52.382 btags += '+' * lb
2025-07-01 05:48:52.388 elif tag == 'equal':
2025-07-01 05:48:52.395 atags += ' ' * la
2025-07-01 05:48:52.404 btags += ' ' * lb
2025-07-01 05:48:52.413 else:
2025-07-01 05:48:52.419 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:48:52.427 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:48:52.438 else:
2025-07-01 05:48:52.447 # the synch pair is identical
2025-07-01 05:48:52.457 yield ' ' + aelt
2025-07-01 05:48:52.464
2025-07-01 05:48:52.471 # pump out diffs from after the synch point
2025-07-01 05:48:52.479 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:48:52.490
2025-07-01 05:48:52.502 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:48:52.510 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:48:52.520
2025-07-01 05:48:52.530 self = <difflib.Differ object at [hex]>
2025-07-01 05:48:52.539 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:48:52.552 alo = 329, ahi = 1101
2025-07-01 05:48:52.565 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:48:52.578 blo = 329, bhi = 1101
2025-07-01 05:48:52.591
2025-07-01 05:48:52.603 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:48:52.615 g = []
2025-07-01 05:48:52.626 if alo < ahi:
2025-07-01 05:48:52.638 if blo < bhi:
2025-07-01 05:48:52.650 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:48:52.662 else:
2025-07-01 05:48:52.675 g = self._dump('-', a, alo, ahi)
2025-07-01 05:48:52.684 elif blo < bhi:
2025-07-01 05:48:52.691 g = self._dump('+', b, blo, bhi)
2025-07-01 05:48:52.698
2025-07-01 05:48:52.707 > yield from g
2025-07-01 05:48:52.714
2025-07-01 05:48:52.725 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:48:52.734 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:48:52.744
2025-07-01 05:48:52.754 self = <difflib.Differ object at [hex]>
2025-07-01 05:48:52.764 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:48:52.771 alo = 329, ahi = 1101
2025-07-01 05:48:52.780 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:48:52.786 blo = 329, bhi = 1101
2025-07-01 05:48:52.791
2025-07-01 05:48:52.799 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:48:52.809 r"""
2025-07-01 05:48:52.817 When replacing one block of lines with another, search the blocks
2025-07-01 05:48:52.822 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:48:52.831 synch point, and intraline difference marking is done on the
2025-07-01 05:48:52.839 similar pair. Lots of work, but often worth it.
2025-07-01 05:48:52.846
2025-07-01 05:48:52.852 Example:
2025-07-01 05:48:52.859
2025-07-01 05:48:52.869 >>> d = Differ()
2025-07-01 05:48:52.877 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:48:52.883 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:48:52.890 >>> print(''.join(results), end="")
2025-07-01 05:48:52.899 - abcDefghiJkl
2025-07-01 05:48:52.912 + abcdefGhijkl
2025-07-01 05:48:52.929 """
2025-07-01 05:48:52.936
2025-07-01 05:48:52.943 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:48:52.950 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:48:52.961 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:48:52.969 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:48:52.975 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:48:52.982
2025-07-01 05:48:52.989 # search for the pair that matches best without being identical
2025-07-01 05:48:52.995 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:48:53.001 # on junk -- unless we have to)
2025-07-01 05:48:53.007 for j in range(blo, bhi):
2025-07-01 05:48:53.012 bj = b[j]
2025-07-01 05:48:53.024 cruncher.set_seq2(bj)
2025-07-01 05:48:53.036 for i in range(alo, ahi):
2025-07-01 05:48:53.047 ai = a[i]
2025-07-01 05:48:53.057 if ai == bj:
2025-07-01 05:48:53.069 if eqi is None:
2025-07-01 05:48:53.079 eqi, eqj = i, j
2025-07-01 05:48:53.087 continue
2025-07-01 05:48:53.094 cruncher.set_seq1(ai)
2025-07-01 05:48:53.101 # computing similarity is expensive, so use the quick
2025-07-01 05:48:53.107 # upper bounds first -- have seen this speed up messy
2025-07-01 05:48:53.113 # compares by a factor of 3.
2025-07-01 05:48:53.124 # note that ratio() is only expensive to compute the first
2025-07-01 05:48:53.135 # time it's called on a sequence pair; the expensive part
2025-07-01 05:48:53.145 # of the computation is cached by cruncher
2025-07-01 05:48:53.153 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:48:53.160 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:48:53.166 cruncher.ratio() > best_ratio:
2025-07-01 05:48:53.177 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:48:53.188 if best_ratio < cutoff:
2025-07-01 05:48:53.199 # no non-identical "pretty close" pair
2025-07-01 05:48:53.211 if eqi is None:
2025-07-01 05:48:53.221 # no identical pair either -- treat it as a straight replace
2025-07-01 05:48:53.230 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:48:53.237 return
2025-07-01 05:48:53.244 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:48:53.251 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:48:53.264 else:
2025-07-01 05:48:53.273 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:48:53.279 eqi = None
2025-07-01 05:48:53.287
2025-07-01 05:48:53.293 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:48:53.300 # identical
2025-07-01 05:48:53.306
2025-07-01 05:48:53.311 # pump out diffs from before the synch point
2025-07-01 05:48:53.318 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:48:53.326
2025-07-01 05:48:53.336 # do intraline marking on the synch pair
2025-07-01 05:48:53.349 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:48:53.360 if eqi is None:
2025-07-01 05:48:53.369 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:48:53.377 atags = btags = ""
2025-07-01 05:48:53.384 cruncher.set_seqs(aelt, belt)
2025-07-01 05:48:53.390 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:48:53.396 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:48:53.401 if tag == 'replace':
2025-07-01 05:48:53.407 atags += '^' * la
2025-07-01 05:48:53.416 btags += '^' * lb
2025-07-01 05:48:53.429 elif tag == 'delete':
2025-07-01 05:48:53.439 atags += '-' * la
2025-07-01 05:48:53.448 elif tag == 'insert':
2025-07-01 05:48:53.460 btags += '+' * lb
2025-07-01 05:48:53.470 elif tag == 'equal':
2025-07-01 05:48:53.476 atags += ' ' * la
2025-07-01 05:48:53.482 btags += ' ' * lb
2025-07-01 05:48:53.488 else:
2025-07-01 05:48:53.494 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:48:53.499 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:48:53.504 else:
2025-07-01 05:48:53.510 # the synch pair is identical
2025-07-01 05:48:53.520 yield ' ' + aelt
2025-07-01 05:48:53.531
2025-07-01 05:48:53.540 # pump out diffs from after the synch point
2025-07-01 05:48:53.548 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:48:53.555
2025-07-01 05:48:53.561 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:48:53.573 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:48:53.583
2025-07-01 05:48:53.589 self = <difflib.Differ object at [hex]>
2025-07-01 05:48:53.596 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:48:53.602 alo = 330, ahi = 1101
2025-07-01 05:48:53.609 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:48:53.616 blo = 330, bhi = 1101
2025-07-01 05:48:53.622
2025-07-01 05:48:53.629 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:48:53.636 g = []
2025-07-01 05:48:53.643 if alo < ahi:
2025-07-01 05:48:53.651 if blo < bhi:
2025-07-01 05:48:53.661 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:48:53.671 else:
2025-07-01 05:48:53.681 g = self._dump('-', a, alo, ahi)
2025-07-01 05:48:53.688 elif blo < bhi:
2025-07-01 05:48:53.698 g = self._dump('+', b, blo, bhi)
2025-07-01 05:48:53.709
2025-07-01 05:48:53.721 > yield from g
2025-07-01 05:48:53.732
2025-07-01 05:48:53.742 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:48:53.751 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:48:53.763
2025-07-01 05:48:53.773 self = <difflib.Differ object at [hex]>
2025-07-01 05:48:53.780 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:48:53.786 alo = 330, ahi = 1101
2025-07-01 05:48:53.795 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:48:53.805 blo = 330, bhi = 1101
2025-07-01 05:48:53.814
2025-07-01 05:48:53.822 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:48:53.828 r"""
2025-07-01 05:48:53.839 When replacing one block of lines with another, search the blocks
2025-07-01 05:48:53.847 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:48:53.856 synch point, and intraline difference marking is done on the
2025-07-01 05:48:53.866 similar pair. Lots of work, but often worth it.
2025-07-01 05:48:53.874
2025-07-01 05:48:53.881 Example:
2025-07-01 05:48:53.891
2025-07-01 05:48:53.899 >>> d = Differ()
2025-07-01 05:48:53.907 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:48:53.914 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:48:53.920 >>> print(''.join(results), end="")
2025-07-01 05:48:53.926 - abcDefghiJkl
2025-07-01 05:48:53.945 + abcdefGhijkl
2025-07-01 05:48:53.960 """
2025-07-01 05:48:53.967
2025-07-01 05:48:53.977 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:48:53.985 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:48:53.992 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:48:53.998 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:48:54.005 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:48:54.014
2025-07-01 05:48:54.026 # search for the pair that matches best without being identical
2025-07-01 05:48:54.037 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:48:54.049 # on junk -- unless we have to)
2025-07-01 05:48:54.061 for j in range(blo, bhi):
2025-07-01 05:48:54.071 bj = b[j]
2025-07-01 05:48:54.084 cruncher.set_seq2(bj)
2025-07-01 05:48:54.096 for i in range(alo, ahi):
2025-07-01 05:48:54.107 ai = a[i]
2025-07-01 05:48:54.117 if ai == bj:
2025-07-01 05:48:54.125 if eqi is None:
2025-07-01 05:48:54.137 eqi, eqj = i, j
2025-07-01 05:48:54.145 continue
2025-07-01 05:48:54.152 cruncher.set_seq1(ai)
2025-07-01 05:48:54.159 # computing similarity is expensive, so use the quick
2025-07-01 05:48:54.167 # upper bounds first -- have seen this speed up messy
2025-07-01 05:48:54.178 # compares by a factor of 3.
2025-07-01 05:48:54.187 # note that ratio() is only expensive to compute the first
2025-07-01 05:48:54.194 # time it's called on a sequence pair; the expensive part
2025-07-01 05:48:54.199 # of the computation is cached by cruncher
2025-07-01 05:48:54.205 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:48:54.210 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:48:54.216 cruncher.ratio() > best_ratio:
2025-07-01 05:48:54.222 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:48:54.228 if best_ratio < cutoff:
2025-07-01 05:48:54.235 # no non-identical "pretty close" pair
2025-07-01 05:48:54.243 if eqi is None:
2025-07-01 05:48:54.251 # no identical pair either -- treat it as a straight replace
2025-07-01 05:48:54.261 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:48:54.269 return
2025-07-01 05:48:54.275 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:48:54.280 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:48:54.286 else:
2025-07-01 05:48:54.293 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:48:54.300 eqi = None
2025-07-01 05:48:54.306
2025-07-01 05:48:54.312 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:48:54.318 # identical
2025-07-01 05:48:54.324
2025-07-01 05:48:54.331 # pump out diffs from before the synch point
2025-07-01 05:48:54.336 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:48:54.342
2025-07-01 05:48:54.350 # do intraline marking on the synch pair
2025-07-01 05:48:54.357 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:48:54.363 if eqi is None:
2025-07-01 05:48:54.369 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:48:54.374 atags = btags = ""
2025-07-01 05:48:54.380 cruncher.set_seqs(aelt, belt)
2025-07-01 05:48:54.386 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:48:54.392 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:48:54.398 if tag == 'replace':
2025-07-01 05:48:54.404 atags += '^' * la
2025-07-01 05:48:54.409 btags += '^' * lb
2025-07-01 05:48:54.415 elif tag == 'delete':
2025-07-01 05:48:54.421 atags += '-' * la
2025-07-01 05:48:54.426 elif tag == 'insert':
2025-07-01 05:48:54.432 btags += '+' * lb
2025-07-01 05:48:54.438 elif tag == 'equal':
2025-07-01 05:48:54.443 atags += ' ' * la
2025-07-01 05:48:54.449 btags += ' ' * lb
2025-07-01 05:48:54.455 else:
2025-07-01 05:48:54.460 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:48:54.466 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:48:54.472 else:
2025-07-01 05:48:54.478 # the synch pair is identical
2025-07-01 05:48:54.483 yield ' ' + aelt
2025-07-01 05:48:54.489
2025-07-01 05:48:54.495 # pump out diffs from after the synch point
2025-07-01 05:48:54.501 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:48:54.506
2025-07-01 05:48:54.512 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:48:54.519 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:48:54.525
2025-07-01 05:48:54.531 self = <difflib.Differ object at [hex]>
2025-07-01 05:48:54.537 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:48:54.543 alo = 331, ahi = 1101
2025-07-01 05:48:54.549 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:48:54.555 blo = 331, bhi = 1101
2025-07-01 05:48:54.561
2025-07-01 05:48:54.566 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:48:54.572 g = []
2025-07-01 05:48:54.578 if alo < ahi:
2025-07-01 05:48:54.584 if blo < bhi:
2025-07-01 05:48:54.590 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:48:54.596 else:
2025-07-01 05:48:54.604 g = self._dump('-', a, alo, ahi)
2025-07-01 05:48:54.612 elif blo < bhi:
2025-07-01 05:48:54.620 g = self._dump('+', b, blo, bhi)
2025-07-01 05:48:54.625
2025-07-01 05:48:54.631 > yield from g
2025-07-01 05:48:54.636
2025-07-01 05:48:54.641 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:48:54.647 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:48:54.653
2025-07-01 05:48:54.661 self = <difflib.Differ object at [hex]>
2025-07-01 05:48:54.672 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:48:54.681 alo = 331, ahi = 1101
2025-07-01 05:48:54.688 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:48:54.694 blo = 331, bhi = 1101
2025-07-01 05:48:54.701
2025-07-01 05:48:54.708 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:48:54.714 r"""
2025-07-01 05:48:54.721 When replacing one block of lines with another, search the blocks
2025-07-01 05:48:54.730 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:48:54.739 synch point, and intraline difference marking is done on the
2025-07-01 05:48:54.750 similar pair. Lots of work, but often worth it.
2025-07-01 05:48:54.758
2025-07-01 05:48:54.765 Example:
2025-07-01 05:48:54.771
2025-07-01 05:48:54.778 >>> d = Differ()
2025-07-01 05:48:54.789 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:48:54.799 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:48:54.806 >>> print(''.join(results), end="")
2025-07-01 05:48:54.814 - abcDefghiJkl
2025-07-01 05:48:54.835 + abcdefGhijkl
2025-07-01 05:48:54.849 """
2025-07-01 05:48:54.855
2025-07-01 05:48:54.861 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:48:54.873 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:48:54.881 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:48:54.887 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:48:54.898 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:48:54.913
2025-07-01 05:48:54.924 # search for the pair that matches best without being identical
2025-07-01 05:48:54.931 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:48:54.937 # on junk -- unless we have to)
2025-07-01 05:48:54.943 for j in range(blo, bhi):
2025-07-01 05:48:54.948 bj = b[j]
2025-07-01 05:48:54.952 cruncher.set_seq2(bj)
2025-07-01 05:48:54.958 for i in range(alo, ahi):
2025-07-01 05:48:54.964 ai = a[i]
2025-07-01 05:48:54.969 if ai == bj:
2025-07-01 05:48:54.975 if eqi is None:
2025-07-01 05:48:54.981 eqi, eqj = i, j
2025-07-01 05:48:54.986 continue
2025-07-01 05:48:54.992 cruncher.set_seq1(ai)
2025-07-01 05:48:54.998 # computing similarity is expensive, so use the quick
2025-07-01 05:48:55.010 # upper bounds first -- have seen this speed up messy
2025-07-01 05:48:55.022 # compares by a factor of 3.
2025-07-01 05:48:55.034 # note that ratio() is only expensive to compute the first
2025-07-01 05:48:55.043 # time it's called on a sequence pair; the expensive part
2025-07-01 05:48:55.051 # of the computation is cached by cruncher
2025-07-01 05:48:55.058 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:48:55.069 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:48:55.079 cruncher.ratio() > best_ratio:
2025-07-01 05:48:55.088 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:48:55.099 if best_ratio < cutoff:
2025-07-01 05:48:55.110 # no non-identical "pretty close" pair
2025-07-01 05:48:55.120 if eqi is None:
2025-07-01 05:48:55.129 # no identical pair either -- treat it as a straight replace
2025-07-01 05:48:55.140 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:48:55.150 return
2025-07-01 05:48:55.159 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:48:55.167 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:48:55.173 else:
2025-07-01 05:48:55.179 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:48:55.185 eqi = None
2025-07-01 05:48:55.190
2025-07-01 05:48:55.200 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:48:55.210 # identical
2025-07-01 05:48:55.216
2025-07-01 05:48:55.222 # pump out diffs from before the synch point
2025-07-01 05:48:55.228 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:48:55.235
2025-07-01 05:48:55.244 # do intraline marking on the synch pair
2025-07-01 05:48:55.256 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:48:55.265 if eqi is None:
2025-07-01 05:48:55.278 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:48:55.290 atags = btags = ""
2025-07-01 05:48:55.299 cruncher.set_seqs(aelt, belt)
2025-07-01 05:48:55.307 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:48:55.314 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:48:55.324 if tag == 'replace':
2025-07-01 05:48:55.334 atags += '^' * la
2025-07-01 05:48:55.342 btags += '^' * lb
2025-07-01 05:48:55.352 elif tag == 'delete':
2025-07-01 05:48:55.365 atags += '-' * la
2025-07-01 05:48:55.376 elif tag == 'insert':
2025-07-01 05:48:55.385 btags += '+' * lb
2025-07-01 05:48:55.393 elif tag == 'equal':
2025-07-01 05:48:55.400 atags += ' ' * la
2025-07-01 05:48:55.405 btags += ' ' * lb
2025-07-01 05:48:55.411 else:
2025-07-01 05:48:55.417 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:48:55.423 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:48:55.429 else:
2025-07-01 05:48:55.434 # the synch pair is identical
2025-07-01 05:48:55.445 yield ' ' + aelt
2025-07-01 05:48:55.456
2025-07-01 05:48:55.468 # pump out diffs from after the synch point
2025-07-01 05:48:55.478 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:48:55.484
2025-07-01 05:48:55.490 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:48:55.496 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:48:55.501
2025-07-01 05:48:55.507 self = <difflib.Differ object at [hex]>
2025-07-01 05:48:55.514 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:48:55.522 alo = 334, ahi = 1101
2025-07-01 05:48:55.529 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:48:55.535 blo = 334, bhi = 1101
2025-07-01 05:48:55.541
2025-07-01 05:48:55.546 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:48:55.552 g = []
2025-07-01 05:48:55.560 if alo < ahi:
2025-07-01 05:48:55.572 if blo < bhi:
2025-07-01 05:48:55.579 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:48:55.585 else:
2025-07-01 05:48:55.591 g = self._dump('-', a, alo, ahi)
2025-07-01 05:48:55.597 elif blo < bhi:
2025-07-01 05:48:55.602 g = self._dump('+', b, blo, bhi)
2025-07-01 05:48:55.608
2025-07-01 05:48:55.614 > yield from g
2025-07-01 05:48:55.622
2025-07-01 05:48:55.628 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:48:55.633 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:48:55.638
2025-07-01 05:48:55.643 self = <difflib.Differ object at [hex]>
2025-07-01 05:48:55.651 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:48:55.662 alo = 334, ahi = 1101
2025-07-01 05:48:55.670 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:48:55.678 blo = 334, bhi = 1101
2025-07-01 05:48:55.685
2025-07-01 05:48:55.691 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:48:55.697 r"""
2025-07-01 05:48:55.702 When replacing one block of lines with another, search the blocks
2025-07-01 05:48:55.708 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:48:55.713 synch point, and intraline difference marking is done on the
2025-07-01 05:48:55.719 similar pair. Lots of work, but often worth it.
2025-07-01 05:48:55.726
2025-07-01 05:48:55.736 Example:
2025-07-01 05:48:55.744
2025-07-01 05:48:55.751 >>> d = Differ()
2025-07-01 05:48:55.758 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:48:55.771 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:48:55.781 >>> print(''.join(results), end="")
2025-07-01 05:48:55.788 - abcDefghiJkl
2025-07-01 05:48:55.806 + abcdefGhijkl
2025-07-01 05:48:55.824 """
2025-07-01 05:48:55.830
2025-07-01 05:48:55.839 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:48:55.850 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:48:55.862 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:48:55.873 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:48:55.886 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:48:55.896
2025-07-01 05:48:55.908 # search for the pair that matches best without being identical
2025-07-01 05:48:55.915 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:48:55.922 # on junk -- unless we have to)
2025-07-01 05:48:55.933 for j in range(blo, bhi):
2025-07-01 05:48:55.942 bj = b[j]
2025-07-01 05:48:55.948 cruncher.set_seq2(bj)
2025-07-01 05:48:55.954 for i in range(alo, ahi):
2025-07-01 05:48:55.960 ai = a[i]
2025-07-01 05:48:55.967 if ai == bj:
2025-07-01 05:48:55.973 if eqi is None:
2025-07-01 05:48:55.980 eqi, eqj = i, j
2025-07-01 05:48:55.987 continue
2025-07-01 05:48:55.995 cruncher.set_seq1(ai)
2025-07-01 05:48:56.006 # computing similarity is expensive, so use the quick
2025-07-01 05:48:56.015 # upper bounds first -- have seen this speed up messy
2025-07-01 05:48:56.022 # compares by a factor of 3.
2025-07-01 05:48:56.028 # note that ratio() is only expensive to compute the first
2025-07-01 05:48:56.033 # time it's called on a sequence pair; the expensive part
2025-07-01 05:48:56.037 # of the computation is cached by cruncher
2025-07-01 05:48:56.043 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:48:56.048 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:48:56.054 cruncher.ratio() > best_ratio:
2025-07-01 05:48:56.061 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:48:56.068 if best_ratio < cutoff:
2025-07-01 05:48:56.074 # no non-identical "pretty close" pair
2025-07-01 05:48:56.080 if eqi is None:
2025-07-01 05:48:56.084 # no identical pair either -- treat it as a straight replace
2025-07-01 05:48:56.089 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:48:56.094 return
2025-07-01 05:48:56.098 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:48:56.103 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:48:56.108 else:
2025-07-01 05:48:56.114 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:48:56.119 eqi = None
2025-07-01 05:48:56.125
2025-07-01 05:48:56.131 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:48:56.138 # identical
2025-07-01 05:48:56.148
2025-07-01 05:48:56.156 # pump out diffs from before the synch point
2025-07-01 05:48:56.162 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:48:56.168
2025-07-01 05:48:56.174 # do intraline marking on the synch pair
2025-07-01 05:48:56.180 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:48:56.185 if eqi is None:
2025-07-01 05:48:56.191 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:48:56.198 atags = btags = ""
2025-07-01 05:48:56.205 cruncher.set_seqs(aelt, belt)
2025-07-01 05:48:56.210 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:48:56.215 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:48:56.220 if tag == 'replace':
2025-07-01 05:48:56.225 atags += '^' * la
2025-07-01 05:48:56.229 btags += '^' * lb
2025-07-01 05:48:56.235 elif tag == 'delete':
2025-07-01 05:48:56.239 atags += '-' * la
2025-07-01 05:48:56.244 elif tag == 'insert':
2025-07-01 05:48:56.249 btags += '+' * lb
2025-07-01 05:48:56.254 elif tag == 'equal':
2025-07-01 05:48:56.259 atags += ' ' * la
2025-07-01 05:48:56.264 btags += ' ' * lb
2025-07-01 05:48:56.269 else:
2025-07-01 05:48:56.274 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:48:56.279 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:48:56.285 else:
2025-07-01 05:48:56.290 # the synch pair is identical
2025-07-01 05:48:56.296 yield ' ' + aelt
2025-07-01 05:48:56.302
2025-07-01 05:48:56.311 # pump out diffs from after the synch point
2025-07-01 05:48:56.317 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:48:56.322
2025-07-01 05:48:56.327 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:48:56.334 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:48:56.339
2025-07-01 05:48:56.345 self = <difflib.Differ object at [hex]>
2025-07-01 05:48:56.351 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:48:56.357 alo = 335, ahi = 1101
2025-07-01 05:48:56.367 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:48:56.377 blo = 335, bhi = 1101
2025-07-01 05:48:56.383
2025-07-01 05:48:56.390 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:48:56.396 g = []
2025-07-01 05:48:56.402 if alo < ahi:
2025-07-01 05:48:56.411 if blo < bhi:
2025-07-01 05:48:56.421 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:48:56.429 else:
2025-07-01 05:48:56.436 g = self._dump('-', a, alo, ahi)
2025-07-01 05:48:56.441 elif blo < bhi:
2025-07-01 05:48:56.446 g = self._dump('+', b, blo, bhi)
2025-07-01 05:48:56.450
2025-07-01 05:48:56.456 > yield from g
2025-07-01 05:48:56.467
2025-07-01 05:48:56.476 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:48:56.483 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:48:56.491
2025-07-01 05:48:56.501 self = <difflib.Differ object at [hex]>
2025-07-01 05:48:56.510 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:48:56.518 alo = 335, ahi = 1101
2025-07-01 05:48:56.526 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:48:56.532 blo = 335, bhi = 1101
2025-07-01 05:48:56.538
2025-07-01 05:48:56.545 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:48:56.550 r"""
2025-07-01 05:48:56.563 When replacing one block of lines with another, search the blocks
2025-07-01 05:48:56.574 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:48:56.586 synch point, and intraline difference marking is done on the
2025-07-01 05:48:56.598 similar pair. Lots of work, but often worth it.
2025-07-01 05:48:56.607
2025-07-01 05:48:56.613 Example:
2025-07-01 05:48:56.619
2025-07-01 05:48:56.626 >>> d = Differ()
2025-07-01 05:48:56.634 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:48:56.644 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:48:56.656 >>> print(''.join(results), end="")
2025-07-01 05:48:56.666 - abcDefghiJkl
2025-07-01 05:48:56.681 + abcdefGhijkl
2025-07-01 05:48:56.700 """
2025-07-01 05:48:56.711
2025-07-01 05:48:56.719 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:48:56.727 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:48:56.735 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:48:56.743 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:48:56.750 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:48:56.761
2025-07-01 05:48:56.771 # search for the pair that matches best without being identical
2025-07-01 05:48:56.780 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:48:56.786 # on junk -- unless we have to)
2025-07-01 05:48:56.794 for j in range(blo, bhi):
2025-07-01 05:48:56.803 bj = b[j]
2025-07-01 05:48:56.813 cruncher.set_seq2(bj)
2025-07-01 05:48:56.821 for i in range(alo, ahi):
2025-07-01 05:48:56.827 ai = a[i]
2025-07-01 05:48:56.833 if ai == bj:
2025-07-01 05:48:56.839 if eqi is None:
2025-07-01 05:48:56.850 eqi, eqj = i, j
2025-07-01 05:48:56.859 continue
2025-07-01 05:48:56.868 cruncher.set_seq1(ai)
2025-07-01 05:48:56.876 # computing similarity is expensive, so use the quick
2025-07-01 05:48:56.885 # upper bounds first -- have seen this speed up messy
2025-07-01 05:48:56.897 # compares by a factor of 3.
2025-07-01 05:48:56.907 # note that ratio() is only expensive to compute the first
2025-07-01 05:48:56.916 # time it's called on a sequence pair; the expensive part
2025-07-01 05:48:56.923 # of the computation is cached by cruncher
2025-07-01 05:48:56.930 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:48:56.935 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:48:56.940 cruncher.ratio() > best_ratio:
2025-07-01 05:48:56.947 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:48:56.959 if best_ratio < cutoff:
2025-07-01 05:48:56.967 # no non-identical "pretty close" pair
2025-07-01 05:48:56.975 if eqi is None:
2025-07-01 05:48:56.983 # no identical pair either -- treat it as a straight replace
2025-07-01 05:48:56.990 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:48:56.996 return
2025-07-01 05:48:57.002 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:48:57.006 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:48:57.013 else:
2025-07-01 05:48:57.018 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:48:57.027 eqi = None
2025-07-01 05:48:57.031
2025-07-01 05:48:57.036 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:48:57.041 # identical
2025-07-01 05:48:57.046
2025-07-01 05:48:57.056 # pump out diffs from before the synch point
2025-07-01 05:48:57.067 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:48:57.076
2025-07-01 05:48:57.084 # do intraline marking on the synch pair
2025-07-01 05:48:57.091 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:48:57.101 if eqi is None:
2025-07-01 05:48:57.114 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:48:57.125 atags = btags = ""
2025-07-01 05:48:57.135 cruncher.set_seqs(aelt, belt)
2025-07-01 05:48:57.144 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:48:57.151 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:48:57.158 if tag == 'replace':
2025-07-01 05:48:57.170 atags += '^' * la
2025-07-01 05:48:57.180 btags += '^' * lb
2025-07-01 05:48:57.189 elif tag == 'delete':
2025-07-01 05:48:57.195 atags += '-' * la
2025-07-01 05:48:57.201 elif tag == 'insert':
2025-07-01 05:48:57.207 btags += '+' * lb
2025-07-01 05:48:57.212 elif tag == 'equal':
2025-07-01 05:48:57.221 atags += ' ' * la
2025-07-01 05:48:57.232 btags += ' ' * lb
2025-07-01 05:48:57.240 else:
2025-07-01 05:48:57.247 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:48:57.254 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:48:57.259 else:
2025-07-01 05:48:57.267 # the synch pair is identical
2025-07-01 05:48:57.277 yield ' ' + aelt
2025-07-01 05:48:57.284
2025-07-01 05:48:57.292 # pump out diffs from after the synch point
2025-07-01 05:48:57.299 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:48:57.307
2025-07-01 05:48:57.318 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:48:57.327 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:48:57.339
2025-07-01 05:48:57.351 self = <difflib.Differ object at [hex]>
2025-07-01 05:48:57.361 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:48:57.369 alo = 336, ahi = 1101
2025-07-01 05:48:57.378 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:48:57.383 blo = 336, bhi = 1101
2025-07-01 05:48:57.389
2025-07-01 05:48:57.394 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:48:57.399 g = []
2025-07-01 05:48:57.404 if alo < ahi:
2025-07-01 05:48:57.410 if blo < bhi:
2025-07-01 05:48:57.416 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:48:57.421 else:
2025-07-01 05:48:57.427 g = self._dump('-', a, alo, ahi)
2025-07-01 05:48:57.436 elif blo < bhi:
2025-07-01 05:48:57.447 g = self._dump('+', b, blo, bhi)
2025-07-01 05:48:57.455
2025-07-01 05:48:57.463 > yield from g
2025-07-01 05:48:57.470
2025-07-01 05:48:57.477 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:48:57.483 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:48:57.490
2025-07-01 05:48:57.499 self = <difflib.Differ object at [hex]>
2025-07-01 05:48:57.508 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:48:57.514 alo = 336, ahi = 1101
2025-07-01 05:48:57.520 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:48:57.526 blo = 336, bhi = 1101
2025-07-01 05:48:57.533
2025-07-01 05:48:57.539 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:48:57.545 r"""
2025-07-01 05:48:57.550 When replacing one block of lines with another, search the blocks
2025-07-01 05:48:57.556 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:48:57.563 synch point, and intraline difference marking is done on the
2025-07-01 05:48:57.570 similar pair. Lots of work, but often worth it.
2025-07-01 05:48:57.577
2025-07-01 05:48:57.583 Example:
2025-07-01 05:48:57.590
2025-07-01 05:48:57.596 >>> d = Differ()
2025-07-01 05:48:57.603 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:48:57.612 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:48:57.620 >>> print(''.join(results), end="")
2025-07-01 05:48:57.627 - abcDefghiJkl
2025-07-01 05:48:57.648 + abcdefGhijkl
2025-07-01 05:48:57.672 """
2025-07-01 05:48:57.683
2025-07-01 05:48:57.695 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:48:57.703 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:48:57.710 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:48:57.715 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:48:57.721 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:48:57.726
2025-07-01 05:48:57.732 # search for the pair that matches best without being identical
2025-07-01 05:48:57.738 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:48:57.744 # on junk -- unless we have to)
2025-07-01 05:48:57.751 for j in range(blo, bhi):
2025-07-01 05:48:57.766 bj = b[j]
2025-07-01 05:48:57.776 cruncher.set_seq2(bj)
2025-07-01 05:48:57.784 for i in range(alo, ahi):
2025-07-01 05:48:57.791 ai = a[i]
2025-07-01 05:48:57.796 if ai == bj:
2025-07-01 05:48:57.803 if eqi is None:
2025-07-01 05:48:57.808 eqi, eqj = i, j
2025-07-01 05:48:57.813 continue
2025-07-01 05:48:57.818 cruncher.set_seq1(ai)
2025-07-01 05:48:57.824 # computing similarity is expensive, so use the quick
2025-07-01 05:48:57.830 # upper bounds first -- have seen this speed up messy
2025-07-01 05:48:57.837 # compares by a factor of 3.
2025-07-01 05:48:57.845 # note that ratio() is only expensive to compute the first
2025-07-01 05:48:57.857 # time it's called on a sequence pair; the expensive part
2025-07-01 05:48:57.867 # of the computation is cached by cruncher
2025-07-01 05:48:57.874 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:48:57.881 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:48:57.893 cruncher.ratio() > best_ratio:
2025-07-01 05:48:57.905 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:48:57.913 if best_ratio < cutoff:
2025-07-01 05:48:57.921 # no non-identical "pretty close" pair
2025-07-01 05:48:57.928 if eqi is None:
2025-07-01 05:48:57.935 # no identical pair either -- treat it as a straight replace
2025-07-01 05:48:57.942 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:48:57.949 return
2025-07-01 05:48:57.956 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:48:57.963 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:48:57.971 else:
2025-07-01 05:48:57.982 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:48:57.990 eqi = None
2025-07-01 05:48:57.998
2025-07-01 05:48:58.004 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:48:58.010 # identical
2025-07-01 05:48:58.016
2025-07-01 05:48:58.022 # pump out diffs from before the synch point
2025-07-01 05:48:58.029 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:48:58.035
2025-07-01 05:48:58.042 # do intraline marking on the synch pair
2025-07-01 05:48:58.051 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:48:58.060 if eqi is None:
2025-07-01 05:48:58.069 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:48:58.080 atags = btags = ""
2025-07-01 05:48:58.087 cruncher.set_seqs(aelt, belt)
2025-07-01 05:48:58.093 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:48:58.098 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:48:58.103 if tag == 'replace':
2025-07-01 05:48:58.107 atags += '^' * la
2025-07-01 05:48:58.113 btags += '^' * lb
2025-07-01 05:48:58.118 elif tag == 'delete':
2025-07-01 05:48:58.124 atags += '-' * la
2025-07-01 05:48:58.130 elif tag == 'insert':
2025-07-01 05:48:58.139 btags += '+' * lb
2025-07-01 05:48:58.146 elif tag == 'equal':
2025-07-01 05:48:58.152 atags += ' ' * la
2025-07-01 05:48:58.157 btags += ' ' * lb
2025-07-01 05:48:58.162 else:
2025-07-01 05:48:58.167 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:48:58.172 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:48:58.178 else:
2025-07-01 05:48:58.183 # the synch pair is identical
2025-07-01 05:48:58.190 yield ' ' + aelt
2025-07-01 05:48:58.197
2025-07-01 05:48:58.203 # pump out diffs from after the synch point
2025-07-01 05:48:58.209 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:48:58.215
2025-07-01 05:48:58.220 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:48:58.225 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:48:58.229
2025-07-01 05:48:58.234 self = <difflib.Differ object at [hex]>
2025-07-01 05:48:58.239 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:48:58.245 alo = 337, ahi = 1101
2025-07-01 05:48:58.250 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:48:58.261 blo = 337, bhi = 1101
2025-07-01 05:48:58.271
2025-07-01 05:48:58.281 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:48:58.289 g = []
2025-07-01 05:48:58.297 if alo < ahi:
2025-07-01 05:48:58.305 if blo < bhi:
2025-07-01 05:48:58.313 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:48:58.320 else:
2025-07-01 05:48:58.330 g = self._dump('-', a, alo, ahi)
2025-07-01 05:48:58.341 elif blo < bhi:
2025-07-01 05:48:58.351 g = self._dump('+', b, blo, bhi)
2025-07-01 05:48:58.359
2025-07-01 05:48:58.370 > yield from g
2025-07-01 05:48:58.378
2025-07-01 05:48:58.386 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:48:58.398 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:48:58.409
2025-07-01 05:48:58.419 self = <difflib.Differ object at [hex]>
2025-07-01 05:48:58.431 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:48:58.440 alo = 337, ahi = 1101
2025-07-01 05:48:58.452 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:48:58.461 blo = 337, bhi = 1101
2025-07-01 05:48:58.469
2025-07-01 05:48:58.476 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:48:58.488 r"""
2025-07-01 05:48:58.497 When replacing one block of lines with another, search the blocks
2025-07-01 05:48:58.507 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:48:58.520 synch point, and intraline difference marking is done on the
2025-07-01 05:48:58.530 similar pair. Lots of work, but often worth it.
2025-07-01 05:48:58.540
2025-07-01 05:48:58.546 Example:
2025-07-01 05:48:58.552
2025-07-01 05:48:58.559 >>> d = Differ()
2025-07-01 05:48:58.571 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:48:58.579 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:48:58.587 >>> print(''.join(results), end="")
2025-07-01 05:48:58.597 - abcDefghiJkl
2025-07-01 05:48:58.620 + abcdefGhijkl
2025-07-01 05:48:58.640 """
2025-07-01 05:48:58.647
2025-07-01 05:48:58.654 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:48:58.661 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:48:58.666 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:48:58.673 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:48:58.685 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:48:58.697
2025-07-01 05:48:58.707 # search for the pair that matches best without being identical
2025-07-01 05:48:58.716 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:48:58.723 # on junk -- unless we have to)
2025-07-01 05:48:58.733 for j in range(blo, bhi):
2025-07-01 05:48:58.746 bj = b[j]
2025-07-01 05:48:58.755 cruncher.set_seq2(bj)
2025-07-01 05:48:58.763 for i in range(alo, ahi):
2025-07-01 05:48:58.771 ai = a[i]
2025-07-01 05:48:58.782 if ai == bj:
2025-07-01 05:48:58.790 if eqi is None:
2025-07-01 05:48:58.799 eqi, eqj = i, j
2025-07-01 05:48:58.811 continue
2025-07-01 05:48:58.821 cruncher.set_seq1(ai)
2025-07-01 05:48:58.831 # computing similarity is expensive, so use the quick
2025-07-01 05:48:58.838 # upper bounds first -- have seen this speed up messy
2025-07-01 05:48:58.845 # compares by a factor of 3.
2025-07-01 05:48:58.857 # note that ratio() is only expensive to compute the first
2025-07-01 05:48:58.867 # time it's called on a sequence pair; the expensive part
2025-07-01 05:48:58.875 # of the computation is cached by cruncher
2025-07-01 05:48:58.882 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:48:58.888 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:48:58.894 cruncher.ratio() > best_ratio:
2025-07-01 05:48:58.899 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:48:58.905 if best_ratio < cutoff:
2025-07-01 05:48:58.910 # no non-identical "pretty close" pair
2025-07-01 05:48:58.915 if eqi is None:
2025-07-01 05:48:58.920 # no identical pair either -- treat it as a straight replace
2025-07-01 05:48:58.926 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:48:58.931 return
2025-07-01 05:48:58.936 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:48:58.943 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:48:58.949 else:
2025-07-01 05:48:58.960 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:48:58.970 eqi = None
2025-07-01 05:48:58.978
2025-07-01 05:48:58.984 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:48:58.991 # identical
2025-07-01 05:48:58.998
2025-07-01 05:48:59.009 # pump out diffs from before the synch point
2025-07-01 05:48:59.019 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:48:59.025
2025-07-01 05:48:59.031 # do intraline marking on the synch pair
2025-07-01 05:48:59.037 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:48:59.044 if eqi is None:
2025-07-01 05:48:59.051 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:48:59.059 atags = btags = ""
2025-07-01 05:48:59.066 cruncher.set_seqs(aelt, belt)
2025-07-01 05:48:59.077 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:48:59.087 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:48:59.096 if tag == 'replace':
2025-07-01 05:48:59.103 atags += '^' * la
2025-07-01 05:48:59.111 btags += '^' * lb
2025-07-01 05:48:59.118 elif tag == 'delete':
2025-07-01 05:48:59.125 atags += '-' * la
2025-07-01 05:48:59.130 elif tag == 'insert':
2025-07-01 05:48:59.135 btags += '+' * lb
2025-07-01 05:48:59.140 elif tag == 'equal':
2025-07-01 05:48:59.146 atags += ' ' * la
2025-07-01 05:48:59.151 btags += ' ' * lb
2025-07-01 05:48:59.157 else:
2025-07-01 05:48:59.163 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:48:59.169 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:48:59.173 else:
2025-07-01 05:48:59.178 # the synch pair is identical
2025-07-01 05:48:59.183 yield ' ' + aelt
2025-07-01 05:48:59.188
2025-07-01 05:48:59.194 # pump out diffs from after the synch point
2025-07-01 05:48:59.200 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:48:59.207
2025-07-01 05:48:59.214 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:48:59.221 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:48:59.227
2025-07-01 05:48:59.233 self = <difflib.Differ object at [hex]>
2025-07-01 05:48:59.245 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:48:59.254 alo = 338, ahi = 1101
2025-07-01 05:48:59.261 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:48:59.267 blo = 338, bhi = 1101
2025-07-01 05:48:59.273
2025-07-01 05:48:59.280 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:48:59.286 g = []
2025-07-01 05:48:59.292 if alo < ahi:
2025-07-01 05:48:59.297 if blo < bhi:
2025-07-01 05:48:59.303 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:48:59.308 else:
2025-07-01 05:48:59.314 g = self._dump('-', a, alo, ahi)
2025-07-01 05:48:59.322 elif blo < bhi:
2025-07-01 05:48:59.327 g = self._dump('+', b, blo, bhi)
2025-07-01 05:48:59.333
2025-07-01 05:48:59.337 > yield from g
2025-07-01 05:48:59.348
2025-07-01 05:48:59.359 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:48:59.369 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:48:59.376
2025-07-01 05:48:59.383 self = <difflib.Differ object at [hex]>
2025-07-01 05:48:59.390 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:48:59.400 alo = 338, ahi = 1101
2025-07-01 05:48:59.409 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:48:59.416 blo = 338, bhi = 1101
2025-07-01 05:48:59.422
2025-07-01 05:48:59.431 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:48:59.440 r"""
2025-07-01 05:48:59.447 When replacing one block of lines with another, search the blocks
2025-07-01 05:48:59.455 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:48:59.461 synch point, and intraline difference marking is done on the
2025-07-01 05:48:59.467 similar pair. Lots of work, but often worth it.
2025-07-01 05:48:59.473
2025-07-01 05:48:59.478 Example:
2025-07-01 05:48:59.484
2025-07-01 05:48:59.491 >>> d = Differ()
2025-07-01 05:48:59.501 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:48:59.509 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:48:59.517 >>> print(''.join(results), end="")
2025-07-01 05:48:59.526 - abcDefghiJkl
2025-07-01 05:48:59.547 + abcdefGhijkl
2025-07-01 05:48:59.563 """
2025-07-01 05:48:59.569
2025-07-01 05:48:59.575 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:48:59.581 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:48:59.587 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:48:59.593 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:48:59.599 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:48:59.604
2025-07-01 05:48:59.613 # search for the pair that matches best without being identical
2025-07-01 05:48:59.626 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:48:59.637 # on junk -- unless we have to)
2025-07-01 05:48:59.647 for j in range(blo, bhi):
2025-07-01 05:48:59.657 bj = b[j]
2025-07-01 05:48:59.669 cruncher.set_seq2(bj)
2025-07-01 05:48:59.682 for i in range(alo, ahi):
2025-07-01 05:48:59.692 ai = a[i]
2025-07-01 05:48:59.700 if ai == bj:
2025-07-01 05:48:59.707 if eqi is None:
2025-07-01 05:48:59.717 eqi, eqj = i, j
2025-07-01 05:48:59.730 continue
2025-07-01 05:48:59.741 cruncher.set_seq1(ai)
2025-07-01 05:48:59.753 # computing similarity is expensive, so use the quick
2025-07-01 05:48:59.763 # upper bounds first -- have seen this speed up messy
2025-07-01 05:48:59.772 # compares by a factor of 3.
2025-07-01 05:48:59.779 # note that ratio() is only expensive to compute the first
2025-07-01 05:48:59.787 # time it's called on a sequence pair; the expensive part
2025-07-01 05:48:59.798 # of the computation is cached by cruncher
2025-07-01 05:48:59.805 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:48:59.812 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:48:59.818 cruncher.ratio() > best_ratio:
2025-07-01 05:48:59.826 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:48:59.835 if best_ratio < cutoff:
2025-07-01 05:48:59.847 # no non-identical "pretty close" pair
2025-07-01 05:48:59.857 if eqi is None:
2025-07-01 05:48:59.866 # no identical pair either -- treat it as a straight replace
2025-07-01 05:48:59.872 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:48:59.879 return
2025-07-01 05:48:59.886 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:48:59.896 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:48:59.905 else:
2025-07-01 05:48:59.912 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:48:59.919 eqi = None
2025-07-01 05:48:59.927
2025-07-01 05:48:59.938 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:48:59.945 # identical
2025-07-01 05:48:59.952
2025-07-01 05:48:59.959 # pump out diffs from before the synch point
2025-07-01 05:48:59.965 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:48:59.971
2025-07-01 05:48:59.976 # do intraline marking on the synch pair
2025-07-01 05:48:59.987 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:48:59.993 if eqi is None:
2025-07-01 05:48:59.999 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:49:00.006 atags = btags = ""
2025-07-01 05:49:00.015 cruncher.set_seqs(aelt, belt)
2025-07-01 05:49:00.027 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:49:00.037 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:49:00.046 if tag == 'replace':
2025-07-01 05:49:00.055 atags += '^' * la
2025-07-01 05:49:00.062 btags += '^' * lb
2025-07-01 05:49:00.068 elif tag == 'delete':
2025-07-01 05:49:00.073 atags += '-' * la
2025-07-01 05:49:00.079 elif tag == 'insert':
2025-07-01 05:49:00.086 btags += '+' * lb
2025-07-01 05:49:00.098 elif tag == 'equal':
2025-07-01 05:49:00.104 atags += ' ' * la
2025-07-01 05:49:00.111 btags += ' ' * lb
2025-07-01 05:49:00.118 else:
2025-07-01 05:49:00.124 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:49:00.138 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:49:00.149 else:
2025-07-01 05:49:00.161 # the synch pair is identical
2025-07-01 05:49:00.172 yield ' ' + aelt
2025-07-01 05:49:00.183
2025-07-01 05:49:00.191 # pump out diffs from after the synch point
2025-07-01 05:49:00.204 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:49:00.215
2025-07-01 05:49:00.224 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:49:00.230 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:49:00.235
2025-07-01 05:49:00.240 self = <difflib.Differ object at [hex]>
2025-07-01 05:49:00.245 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:49:00.250 alo = 339, ahi = 1101
2025-07-01 05:49:00.255 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:49:00.259 blo = 339, bhi = 1101
2025-07-01 05:49:00.264
2025-07-01 05:49:00.269 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:49:00.276 g = []
2025-07-01 05:49:00.285 if alo < ahi:
2025-07-01 05:49:00.293 if blo < bhi:
2025-07-01 05:49:00.300 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:49:00.308 else:
2025-07-01 05:49:00.315 g = self._dump('-', a, alo, ahi)
2025-07-01 05:49:00.322 elif blo < bhi:
2025-07-01 05:49:00.332 g = self._dump('+', b, blo, bhi)
2025-07-01 05:49:00.341
2025-07-01 05:49:00.348 > yield from g
2025-07-01 05:49:00.355
2025-07-01 05:49:00.360 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:49:00.371 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:49:00.380
2025-07-01 05:49:00.390 self = <difflib.Differ object at [hex]>
2025-07-01 05:49:00.398 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:49:00.406 alo = 339, ahi = 1101
2025-07-01 05:49:00.416 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:49:00.422 blo = 339, bhi = 1101
2025-07-01 05:49:00.431
2025-07-01 05:49:00.442 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:49:00.454 r"""
2025-07-01 05:49:00.463 When replacing one block of lines with another, search the blocks
2025-07-01 05:49:00.470 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:49:00.476 synch point, and intraline difference marking is done on the
2025-07-01 05:49:00.483 similar pair. Lots of work, but often worth it.
2025-07-01 05:49:00.488
2025-07-01 05:49:00.494 Example:
2025-07-01 05:49:00.499
2025-07-01 05:49:00.506 >>> d = Differ()
2025-07-01 05:49:00.516 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:49:00.527 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:49:00.539 >>> print(''.join(results), end="")
2025-07-01 05:49:00.548 - abcDefghiJkl
2025-07-01 05:49:00.563 + abcdefGhijkl
2025-07-01 05:49:00.588 """
2025-07-01 05:49:00.599
2025-07-01 05:49:00.606 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:49:00.614 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:49:00.620 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:49:00.625 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:49:00.631 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:49:00.636
2025-07-01 05:49:00.643 # search for the pair that matches best without being identical
2025-07-01 05:49:00.648 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:49:00.656 # on junk -- unless we have to)
2025-07-01 05:49:00.663 for j in range(blo, bhi):
2025-07-01 05:49:00.670 bj = b[j]
2025-07-01 05:49:00.679 cruncher.set_seq2(bj)
2025-07-01 05:49:00.690 for i in range(alo, ahi):
2025-07-01 05:49:00.700 ai = a[i]
2025-07-01 05:49:00.709 if ai == bj:
2025-07-01 05:49:00.716 if eqi is None:
2025-07-01 05:49:00.722 eqi, eqj = i, j
2025-07-01 05:49:00.733 continue
2025-07-01 05:49:00.742 cruncher.set_seq1(ai)
2025-07-01 05:49:00.749 # computing similarity is expensive, so use the quick
2025-07-01 05:49:00.756 # upper bounds first -- have seen this speed up messy
2025-07-01 05:49:00.762 # compares by a factor of 3.
2025-07-01 05:49:00.772 # note that ratio() is only expensive to compute the first
2025-07-01 05:49:00.781 # time it's called on a sequence pair; the expensive part
2025-07-01 05:49:00.787 # of the computation is cached by cruncher
2025-07-01 05:49:00.794 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:49:00.805 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:49:00.818 cruncher.ratio() > best_ratio:
2025-07-01 05:49:00.830 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:49:00.841 if best_ratio < cutoff:
2025-07-01 05:49:00.851 # no non-identical "pretty close" pair
2025-07-01 05:49:00.859 if eqi is None:
2025-07-01 05:49:00.867 # no identical pair either -- treat it as a straight replace
2025-07-01 05:49:00.879 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:49:00.891 return
2025-07-01 05:49:00.902 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:49:00.912 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:49:00.926 else:
2025-07-01 05:49:00.940 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:49:00.952 eqi = None
2025-07-01 05:49:00.964
2025-07-01 05:49:00.977 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:49:00.988 # identical
2025-07-01 05:49:01.000
2025-07-01 05:49:01.011 # pump out diffs from before the synch point
2025-07-01 05:49:01.019 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:49:01.031
2025-07-01 05:49:01.041 # do intraline marking on the synch pair
2025-07-01 05:49:01.047 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:49:01.055 if eqi is None:
2025-07-01 05:49:01.063 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:49:01.072 atags = btags = ""
2025-07-01 05:49:01.079 cruncher.set_seqs(aelt, belt)
2025-07-01 05:49:01.086 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:49:01.093 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:49:01.098 if tag == 'replace':
2025-07-01 05:49:01.104 atags += '^' * la
2025-07-01 05:49:01.110 btags += '^' * lb
2025-07-01 05:49:01.121 elif tag == 'delete':
2025-07-01 05:49:01.129 atags += '-' * la
2025-07-01 05:49:01.136 elif tag == 'insert':
2025-07-01 05:49:01.142 btags += '+' * lb
2025-07-01 05:49:01.148 elif tag == 'equal':
2025-07-01 05:49:01.155 atags += ' ' * la
2025-07-01 05:49:01.160 btags += ' ' * lb
2025-07-01 05:49:01.166 else:
2025-07-01 05:49:01.176 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:49:01.185 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:49:01.193 else:
2025-07-01 05:49:01.205 # the synch pair is identical
2025-07-01 05:49:01.215 yield ' ' + aelt
2025-07-01 05:49:01.223
2025-07-01 05:49:01.231 # pump out diffs from after the synch point
2025-07-01 05:49:01.237 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:49:01.251
2025-07-01 05:49:01.262 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:49:01.268 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:49:01.282
2025-07-01 05:49:01.293 self = <difflib.Differ object at [hex]>
2025-07-01 05:49:01.301 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:49:01.307 alo = 340, ahi = 1101
2025-07-01 05:49:01.314 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:49:01.322 blo = 340, bhi = 1101
2025-07-01 05:49:01.330
2025-07-01 05:49:01.337 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:49:01.344 g = []
2025-07-01 05:49:01.350 if alo < ahi:
2025-07-01 05:49:01.355 if blo < bhi:
2025-07-01 05:49:01.360 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:49:01.366 else:
2025-07-01 05:49:01.371 g = self._dump('-', a, alo, ahi)
2025-07-01 05:49:01.380 elif blo < bhi:
2025-07-01 05:49:01.390 g = self._dump('+', b, blo, bhi)
2025-07-01 05:49:01.397
2025-07-01 05:49:01.404 > yield from g
2025-07-01 05:49:01.410
2025-07-01 05:49:01.417 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:49:01.429 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:49:01.440
2025-07-01 05:49:01.447 self = <difflib.Differ object at [hex]>
2025-07-01 05:49:01.453 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:49:01.458 alo = 340, ahi = 1101
2025-07-01 05:49:01.464 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:49:01.468 blo = 340, bhi = 1101
2025-07-01 05:49:01.473
2025-07-01 05:49:01.477 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:49:01.482 r"""
2025-07-01 05:49:01.487 When replacing one block of lines with another, search the blocks
2025-07-01 05:49:01.492 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:49:01.497 synch point, and intraline difference marking is done on the
2025-07-01 05:49:01.502 similar pair. Lots of work, but often worth it.
2025-07-01 05:49:01.507
2025-07-01 05:49:01.511 Example:
2025-07-01 05:49:01.516
2025-07-01 05:49:01.521 >>> d = Differ()
2025-07-01 05:49:01.527 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:49:01.532 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:49:01.537 >>> print(''.join(results), end="")
2025-07-01 05:49:01.542 - abcDefghiJkl
2025-07-01 05:49:01.551 + abcdefGhijkl
2025-07-01 05:49:01.562 """
2025-07-01 05:49:01.568
2025-07-01 05:49:01.574 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:49:01.582 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:49:01.589 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:49:01.596 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:49:01.602 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:49:01.607
2025-07-01 05:49:01.614 # search for the pair that matches best without being identical
2025-07-01 05:49:01.620 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:49:01.626 # on junk -- unless we have to)
2025-07-01 05:49:01.632 for j in range(blo, bhi):
2025-07-01 05:49:01.639 bj = b[j]
2025-07-01 05:49:01.646 cruncher.set_seq2(bj)
2025-07-01 05:49:01.653 for i in range(alo, ahi):
2025-07-01 05:49:01.665 ai = a[i]
2025-07-01 05:49:01.675 if ai == bj:
2025-07-01 05:49:01.686 if eqi is None:
2025-07-01 05:49:01.698 eqi, eqj = i, j
2025-07-01 05:49:01.705 continue
2025-07-01 05:49:01.712 cruncher.set_seq1(ai)
2025-07-01 05:49:01.719 # computing similarity is expensive, so use the quick
2025-07-01 05:49:01.726 # upper bounds first -- have seen this speed up messy
2025-07-01 05:49:01.735 # compares by a factor of 3.
2025-07-01 05:49:01.746 # note that ratio() is only expensive to compute the first
2025-07-01 05:49:01.754 # time it's called on a sequence pair; the expensive part
2025-07-01 05:49:01.761 # of the computation is cached by cruncher
2025-07-01 05:49:01.767 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:49:01.774 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:49:01.782 cruncher.ratio() > best_ratio:
2025-07-01 05:49:01.791 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:49:01.801 if best_ratio < cutoff:
2025-07-01 05:49:01.810 # no non-identical "pretty close" pair
2025-07-01 05:49:01.821 if eqi is None:
2025-07-01 05:49:01.828 # no identical pair either -- treat it as a straight replace
2025-07-01 05:49:01.834 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:49:01.839 return
2025-07-01 05:49:01.844 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:49:01.848 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:49:01.853 else:
2025-07-01 05:49:01.858 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:49:01.862 eqi = None
2025-07-01 05:49:01.870
2025-07-01 05:49:01.875 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:49:01.881 # identical
2025-07-01 05:49:01.886
2025-07-01 05:49:01.893 # pump out diffs from before the synch point
2025-07-01 05:49:01.900 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:49:01.906
2025-07-01 05:49:01.917 # do intraline marking on the synch pair
2025-07-01 05:49:01.925 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:49:01.932 if eqi is None:
2025-07-01 05:49:01.941 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:49:01.947 atags = btags = ""
2025-07-01 05:49:01.953 cruncher.set_seqs(aelt, belt)
2025-07-01 05:49:01.959 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:49:01.965 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:49:01.972 if tag == 'replace':
2025-07-01 05:49:01.979 atags += '^' * la
2025-07-01 05:49:01.987 btags += '^' * lb
2025-07-01 05:49:01.994 elif tag == 'delete':
2025-07-01 05:49:02.001 atags += '-' * la
2025-07-01 05:49:02.009 elif tag == 'insert':
2025-07-01 05:49:02.016 btags += '+' * lb
2025-07-01 05:49:02.024 elif tag == 'equal':
2025-07-01 05:49:02.032 atags += ' ' * la
2025-07-01 05:49:02.039 btags += ' ' * lb
2025-07-01 05:49:02.046 else:
2025-07-01 05:49:02.054 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:49:02.065 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:49:02.075 else:
2025-07-01 05:49:02.082 # the synch pair is identical
2025-07-01 05:49:02.088 yield ' ' + aelt
2025-07-01 05:49:02.095
2025-07-01 05:49:02.103 # pump out diffs from after the synch point
2025-07-01 05:49:02.109 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:49:02.114
2025-07-01 05:49:02.119 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:49:02.124 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:49:02.128
2025-07-01 05:49:02.133 self = <difflib.Differ object at [hex]>
2025-07-01 05:49:02.139 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:49:02.145 alo = 341, ahi = 1101
2025-07-01 05:49:02.158 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:49:02.167 blo = 341, bhi = 1101
2025-07-01 05:49:02.175
2025-07-01 05:49:02.181 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:49:02.186 g = []
2025-07-01 05:49:02.191 if alo < ahi:
2025-07-01 05:49:02.197 if blo < bhi:
2025-07-01 05:49:02.203 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:49:02.209 else:
2025-07-01 05:49:02.216 g = self._dump('-', a, alo, ahi)
2025-07-01 05:49:02.223 elif blo < bhi:
2025-07-01 05:49:02.232 g = self._dump('+', b, blo, bhi)
2025-07-01 05:49:02.243
2025-07-01 05:49:02.250 > yield from g
2025-07-01 05:49:02.257
2025-07-01 05:49:02.263 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:49:02.270 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:49:02.277
2025-07-01 05:49:02.284 self = <difflib.Differ object at [hex]>
2025-07-01 05:49:02.292 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:49:02.297 alo = 341, ahi = 1101
2025-07-01 05:49:02.310 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:49:02.319 blo = 341, bhi = 1101
2025-07-01 05:49:02.326
2025-07-01 05:49:02.337 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:49:02.348 r"""
2025-07-01 05:49:02.356 When replacing one block of lines with another, search the blocks
2025-07-01 05:49:02.362 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:49:02.368 synch point, and intraline difference marking is done on the
2025-07-01 05:49:02.376 similar pair. Lots of work, but often worth it.
2025-07-01 05:49:02.387
2025-07-01 05:49:02.394 Example:
2025-07-01 05:49:02.400
2025-07-01 05:49:02.407 >>> d = Differ()
2025-07-01 05:49:02.414 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:49:02.423 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:49:02.432 >>> print(''.join(results), end="")
2025-07-01 05:49:02.442 - abcDefghiJkl
2025-07-01 05:49:02.458 + abcdefGhijkl
2025-07-01 05:49:02.475 """
2025-07-01 05:49:02.486
2025-07-01 05:49:02.495 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:49:02.506 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:49:02.518 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:49:02.528 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:49:02.535 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:49:02.542
2025-07-01 05:49:02.552 # search for the pair that matches best without being identical
2025-07-01 05:49:02.562 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:49:02.568 # on junk -- unless we have to)
2025-07-01 05:49:02.580 for j in range(blo, bhi):
2025-07-01 05:49:02.589 bj = b[j]
2025-07-01 05:49:02.596 cruncher.set_seq2(bj)
2025-07-01 05:49:02.604 for i in range(alo, ahi):
2025-07-01 05:49:02.610 ai = a[i]
2025-07-01 05:49:02.616 if ai == bj:
2025-07-01 05:49:02.622 if eqi is None:
2025-07-01 05:49:02.627 eqi, eqj = i, j
2025-07-01 05:49:02.632 continue
2025-07-01 05:49:02.637 cruncher.set_seq1(ai)
2025-07-01 05:49:02.642 # computing similarity is expensive, so use the quick
2025-07-01 05:49:02.646 # upper bounds first -- have seen this speed up messy
2025-07-01 05:49:02.651 # compares by a factor of 3.
2025-07-01 05:49:02.663 # note that ratio() is only expensive to compute the first
2025-07-01 05:49:02.677 # time it's called on a sequence pair; the expensive part
2025-07-01 05:49:02.689 # of the computation is cached by cruncher
2025-07-01 05:49:02.702 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:49:02.713 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:49:02.724 cruncher.ratio() > best_ratio:
2025-07-01 05:49:02.732 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:49:02.738 if best_ratio < cutoff:
2025-07-01 05:49:02.744 # no non-identical "pretty close" pair
2025-07-01 05:49:02.750 if eqi is None:
2025-07-01 05:49:02.754 # no identical pair either -- treat it as a straight replace
2025-07-01 05:49:02.759 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:49:02.765 return
2025-07-01 05:49:02.772 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:49:02.777 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:49:02.783 else:
2025-07-01 05:49:02.791 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:49:02.800 eqi = None
2025-07-01 05:49:02.807
2025-07-01 05:49:02.813 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:49:02.819 # identical
2025-07-01 05:49:02.826
2025-07-01 05:49:02.836 # pump out diffs from before the synch point
2025-07-01 05:49:02.847 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:49:02.858
2025-07-01 05:49:02.870 # do intraline marking on the synch pair
2025-07-01 05:49:02.879 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:49:02.890 if eqi is None:
2025-07-01 05:49:02.899 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:49:02.906 atags = btags = ""
2025-07-01 05:49:02.913 cruncher.set_seqs(aelt, belt)
2025-07-01 05:49:02.918 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:49:02.927 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:49:02.939 if tag == 'replace':
2025-07-01 05:49:02.948 atags += '^' * la
2025-07-01 05:49:02.955 btags += '^' * lb
2025-07-01 05:49:02.962 elif tag == 'delete':
2025-07-01 05:49:02.967 atags += '-' * la
2025-07-01 05:49:02.974 elif tag == 'insert':
2025-07-01 05:49:02.983 btags += '+' * lb
2025-07-01 05:49:02.989 elif tag == 'equal':
2025-07-01 05:49:02.995 atags += ' ' * la
2025-07-01 05:49:03.001 btags += ' ' * lb
2025-07-01 05:49:03.008 else:
2025-07-01 05:49:03.016 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:49:03.026 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:49:03.036 else:
2025-07-01 05:49:03.046 # the synch pair is identical
2025-07-01 05:49:03.057 yield ' ' + aelt
2025-07-01 05:49:03.066
2025-07-01 05:49:03.078 # pump out diffs from after the synch point
2025-07-01 05:49:03.088 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:49:03.097
2025-07-01 05:49:03.109 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:49:03.119 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:49:03.127
2025-07-01 05:49:03.134 self = <difflib.Differ object at [hex]>
2025-07-01 05:49:03.142 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:49:03.152 alo = 342, ahi = 1101
2025-07-01 05:49:03.161 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:49:03.168 blo = 342, bhi = 1101
2025-07-01 05:49:03.177
2025-07-01 05:49:03.188 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:49:03.195 g = []
2025-07-01 05:49:03.208 if alo < ahi:
2025-07-01 05:49:03.220 if blo < bhi:
2025-07-01 05:49:03.231 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:49:03.240 else:
2025-07-01 05:49:03.250 g = self._dump('-', a, alo, ahi)
2025-07-01 05:49:03.261 elif blo < bhi:
2025-07-01 05:49:03.270 g = self._dump('+', b, blo, bhi)
2025-07-01 05:49:03.280
2025-07-01 05:49:03.289 > yield from g
2025-07-01 05:49:03.297
2025-07-01 05:49:03.304 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:49:03.310 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:49:03.318
2025-07-01 05:49:03.329 self = <difflib.Differ object at [hex]>
2025-07-01 05:49:03.340 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:49:03.350 alo = 342, ahi = 1101
2025-07-01 05:49:03.362 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:49:03.371 blo = 342, bhi = 1101
2025-07-01 05:49:03.383
2025-07-01 05:49:03.393 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:49:03.403 r"""
2025-07-01 05:49:03.411 When replacing one block of lines with another, search the blocks
2025-07-01 05:49:03.421 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:49:03.430 synch point, and intraline difference marking is done on the
2025-07-01 05:49:03.439 similar pair. Lots of work, but often worth it.
2025-07-01 05:49:03.447
2025-07-01 05:49:03.454 Example:
2025-07-01 05:49:03.459
2025-07-01 05:49:03.464 >>> d = Differ()
2025-07-01 05:49:03.470 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:49:03.476 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:49:03.481 >>> print(''.join(results), end="")
2025-07-01 05:49:03.485 - abcDefghiJkl
2025-07-01 05:49:03.494 + abcdefGhijkl
2025-07-01 05:49:03.502 """
2025-07-01 05:49:03.507
2025-07-01 05:49:03.516 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:49:03.531 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:49:03.538 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:49:03.549 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:49:03.555 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:49:03.562
2025-07-01 05:49:03.568 # search for the pair that matches best without being identical
2025-07-01 05:49:03.574 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:49:03.579 # on junk -- unless we have to)
2025-07-01 05:49:03.584 for j in range(blo, bhi):
2025-07-01 05:49:03.589 bj = b[j]
2025-07-01 05:49:03.595 cruncher.set_seq2(bj)
2025-07-01 05:49:03.604 for i in range(alo, ahi):
2025-07-01 05:49:03.613 ai = a[i]
2025-07-01 05:49:03.621 if ai == bj:
2025-07-01 05:49:03.627 if eqi is None:
2025-07-01 05:49:03.637 eqi, eqj = i, j
2025-07-01 05:49:03.645 continue
2025-07-01 05:49:03.651 cruncher.set_seq1(ai)
2025-07-01 05:49:03.657 # computing similarity is expensive, so use the quick
2025-07-01 05:49:03.662 # upper bounds first -- have seen this speed up messy
2025-07-01 05:49:03.674 # compares by a factor of 3.
2025-07-01 05:49:03.684 # note that ratio() is only expensive to compute the first
2025-07-01 05:49:03.693 # time it's called on a sequence pair; the expensive part
2025-07-01 05:49:03.700 # of the computation is cached by cruncher
2025-07-01 05:49:03.707 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:49:03.716 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:49:03.728 cruncher.ratio() > best_ratio:
2025-07-01 05:49:03.737 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:49:03.743 if best_ratio < cutoff:
2025-07-01 05:49:03.756 # no non-identical "pretty close" pair
2025-07-01 05:49:03.765 if eqi is None:
2025-07-01 05:49:03.772 # no identical pair either -- treat it as a straight replace
2025-07-01 05:49:03.779 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:49:03.786 return
2025-07-01 05:49:03.797 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:49:03.806 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:49:03.814 else:
2025-07-01 05:49:03.822 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:49:03.829 eqi = None
2025-07-01 05:49:03.837
2025-07-01 05:49:03.845 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:49:03.858 # identical
2025-07-01 05:49:03.868
2025-07-01 05:49:03.875 # pump out diffs from before the synch point
2025-07-01 05:49:03.882 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:49:03.889
2025-07-01 05:49:03.896 # do intraline marking on the synch pair
2025-07-01 05:49:03.902 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:49:03.908 if eqi is None:
2025-07-01 05:49:03.914 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:49:03.922 atags = btags = ""
2025-07-01 05:49:03.930 cruncher.set_seqs(aelt, belt)
2025-07-01 05:49:03.937 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:49:03.948 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:49:03.961 if tag == 'replace':
2025-07-01 05:49:03.974 atags += '^' * la
2025-07-01 05:49:03.984 btags += '^' * lb
2025-07-01 05:49:03.991 elif tag == 'delete':
2025-07-01 05:49:03.997 atags += '-' * la
2025-07-01 05:49:04.002 elif tag == 'insert':
2025-07-01 05:49:04.008 btags += '+' * lb
2025-07-01 05:49:04.020 elif tag == 'equal':
2025-07-01 05:49:04.032 atags += ' ' * la
2025-07-01 05:49:04.041 btags += ' ' * lb
2025-07-01 05:49:04.051 else:
2025-07-01 05:49:04.062 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:49:04.073 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:49:04.085 else:
2025-07-01 05:49:04.096 # the synch pair is identical
2025-07-01 05:49:04.107 yield ' ' + aelt
2025-07-01 05:49:04.117
2025-07-01 05:49:04.129 # pump out diffs from after the synch point
2025-07-01 05:49:04.139 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:49:04.147
2025-07-01 05:49:04.154 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:49:04.165 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:49:04.177
2025-07-01 05:49:04.188 self = <difflib.Differ object at [hex]>
2025-07-01 05:49:04.201 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:49:04.212 alo = 343, ahi = 1101
2025-07-01 05:49:04.226 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:49:04.237 blo = 343, bhi = 1101
2025-07-01 05:49:04.247
2025-07-01 05:49:04.256 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:49:04.263 g = []
2025-07-01 05:49:04.270 if alo < ahi:
2025-07-01 05:49:04.282 if blo < bhi:
2025-07-01 05:49:04.292 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:49:04.299 else:
2025-07-01 05:49:04.307 g = self._dump('-', a, alo, ahi)
2025-07-01 05:49:04.313 elif blo < bhi:
2025-07-01 05:49:04.319 g = self._dump('+', b, blo, bhi)
2025-07-01 05:49:04.329
2025-07-01 05:49:04.342 > yield from g
2025-07-01 05:49:04.351
2025-07-01 05:49:04.359 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:49:04.364 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:49:04.369
2025-07-01 05:49:04.374 self = <difflib.Differ object at [hex]>
2025-07-01 05:49:04.380 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:49:04.392 alo = 343, ahi = 1101
2025-07-01 05:49:04.401 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:49:04.412 blo = 343, bhi = 1101
2025-07-01 05:49:04.423
2025-07-01 05:49:04.432 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:49:04.444 r"""
2025-07-01 05:49:04.455 When replacing one block of lines with another, search the blocks
2025-07-01 05:49:04.462 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:49:04.468 synch point, and intraline difference marking is done on the
2025-07-01 05:49:04.474 similar pair. Lots of work, but often worth it.
2025-07-01 05:49:04.479
2025-07-01 05:49:04.485 Example:
2025-07-01 05:49:04.494
2025-07-01 05:49:04.505 >>> d = Differ()
2025-07-01 05:49:04.514 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:49:04.523 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:49:04.530 >>> print(''.join(results), end="")
2025-07-01 05:49:04.540 - abcDefghiJkl
2025-07-01 05:49:04.562 + abcdefGhijkl
2025-07-01 05:49:04.583 """
2025-07-01 05:49:04.591
2025-07-01 05:49:04.599 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:49:04.607 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:49:04.618 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:49:04.628 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:49:04.639 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:49:04.648
2025-07-01 05:49:04.656 # search for the pair that matches best without being identical
2025-07-01 05:49:04.663 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:49:04.670 # on junk -- unless we have to)
2025-07-01 05:49:04.679 for j in range(blo, bhi):
2025-07-01 05:49:04.686 bj = b[j]
2025-07-01 05:49:04.693 cruncher.set_seq2(bj)
2025-07-01 05:49:04.699 for i in range(alo, ahi):
2025-07-01 05:49:04.705 ai = a[i]
2025-07-01 05:49:04.717 if ai == bj:
2025-07-01 05:49:04.730 if eqi is None:
2025-07-01 05:49:04.740 eqi, eqj = i, j
2025-07-01 05:49:04.748 continue
2025-07-01 05:49:04.755 cruncher.set_seq1(ai)
2025-07-01 05:49:04.762 # computing similarity is expensive, so use the quick
2025-07-01 05:49:04.767 # upper bounds first -- have seen this speed up messy
2025-07-01 05:49:04.774 # compares by a factor of 3.
2025-07-01 05:49:04.784 # note that ratio() is only expensive to compute the first
2025-07-01 05:49:04.791 # time it's called on a sequence pair; the expensive part
2025-07-01 05:49:04.798 # of the computation is cached by cruncher
2025-07-01 05:49:04.806 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:49:04.813 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:49:04.820 cruncher.ratio() > best_ratio:
2025-07-01 05:49:04.826 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:49:04.836 if best_ratio < cutoff:
2025-07-01 05:49:04.845 # no non-identical "pretty close" pair
2025-07-01 05:49:04.852 if eqi is None:
2025-07-01 05:49:04.859 # no identical pair either -- treat it as a straight replace
2025-07-01 05:49:04.865 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:49:04.871 return
2025-07-01 05:49:04.877 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:49:04.883 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:49:04.888 else:
2025-07-01 05:49:04.894 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:49:04.905 eqi = None
2025-07-01 05:49:04.915
2025-07-01 05:49:04.922 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:49:04.928 # identical
2025-07-01 05:49:04.934
2025-07-01 05:49:04.940 # pump out diffs from before the synch point
2025-07-01 05:49:04.946 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:49:04.956
2025-07-01 05:49:04.967 # do intraline marking on the synch pair
2025-07-01 05:49:04.977 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:49:04.987 if eqi is None:
2025-07-01 05:49:05.000 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:49:05.012 atags = btags = ""
2025-07-01 05:49:05.022 cruncher.set_seqs(aelt, belt)
2025-07-01 05:49:05.033 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:49:05.042 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:49:05.051 if tag == 'replace':
2025-07-01 05:49:05.058 atags += '^' * la
2025-07-01 05:49:05.067 btags += '^' * lb
2025-07-01 05:49:05.077 elif tag == 'delete':
2025-07-01 05:49:05.086 atags += '-' * la
2025-07-01 05:49:05.098 elif tag == 'insert':
2025-07-01 05:49:05.108 btags += '+' * lb
2025-07-01 05:49:05.115 elif tag == 'equal':
2025-07-01 05:49:05.123 atags += ' ' * la
2025-07-01 05:49:05.133 btags += ' ' * lb
2025-07-01 05:49:05.143 else:
2025-07-01 05:49:05.155 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:49:05.161 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:49:05.173 else:
2025-07-01 05:49:05.185 # the synch pair is identical
2025-07-01 05:49:05.196 yield ' ' + aelt
2025-07-01 05:49:05.206
2025-07-01 05:49:05.218 # pump out diffs from after the synch point
2025-07-01 05:49:05.228 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:49:05.238
2025-07-01 05:49:05.249 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:49:05.258 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:49:05.268
2025-07-01 05:49:05.277 self = <difflib.Differ object at [hex]>
2025-07-01 05:49:05.286 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:49:05.294 alo = 344, ahi = 1101
2025-07-01 05:49:05.303 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:49:05.316 blo = 344, bhi = 1101
2025-07-01 05:49:05.327
2025-07-01 05:49:05.339 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:49:05.348 g = []
2025-07-01 05:49:05.361 if alo < ahi:
2025-07-01 05:49:05.371 if blo < bhi:
2025-07-01 05:49:05.379 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:49:05.385 else:
2025-07-01 05:49:05.392 g = self._dump('-', a, alo, ahi)
2025-07-01 05:49:05.397 elif blo < bhi:
2025-07-01 05:49:05.404 g = self._dump('+', b, blo, bhi)
2025-07-01 05:49:05.412
2025-07-01 05:49:05.420 > yield from g
2025-07-01 05:49:05.426
2025-07-01 05:49:05.440 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:49:05.449 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:49:05.457
2025-07-01 05:49:05.464 self = <difflib.Differ object at [hex]>
2025-07-01 05:49:05.476 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:49:05.486 alo = 344, ahi = 1101
2025-07-01 05:49:05.497 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:49:05.508 blo = 344, bhi = 1101
2025-07-01 05:49:05.515
2025-07-01 05:49:05.522 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:49:05.527 r"""
2025-07-01 05:49:05.534 When replacing one block of lines with another, search the blocks
2025-07-01 05:49:05.541 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:49:05.553 synch point, and intraline difference marking is done on the
2025-07-01 05:49:05.563 similar pair. Lots of work, but often worth it.
2025-07-01 05:49:05.575
2025-07-01 05:49:05.584 Example:
2025-07-01 05:49:05.591
2025-07-01 05:49:05.598 >>> d = Differ()
2025-07-01 05:49:05.606 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:49:05.616 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:49:05.624 >>> print(''.join(results), end="")
2025-07-01 05:49:05.631 - abcDefghiJkl
2025-07-01 05:49:05.650 + abcdefGhijkl
2025-07-01 05:49:05.671 """
2025-07-01 05:49:05.677
2025-07-01 05:49:05.683 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:49:05.690 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:49:05.695 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:49:05.702 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:49:05.709 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:49:05.716
2025-07-01 05:49:05.723 # search for the pair that matches best without being identical
2025-07-01 05:49:05.730 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:49:05.737 # on junk -- unless we have to)
2025-07-01 05:49:05.744 for j in range(blo, bhi):
2025-07-01 05:49:05.751 bj = b[j]
2025-07-01 05:49:05.758 cruncher.set_seq2(bj)
2025-07-01 05:49:05.768 for i in range(alo, ahi):
2025-07-01 05:49:05.778 ai = a[i]
2025-07-01 05:49:05.791 if ai == bj:
2025-07-01 05:49:05.801 if eqi is None:
2025-07-01 05:49:05.812 eqi, eqj = i, j
2025-07-01 05:49:05.825 continue
2025-07-01 05:49:05.837 cruncher.set_seq1(ai)
2025-07-01 05:49:05.844 # computing similarity is expensive, so use the quick
2025-07-01 05:49:05.852 # upper bounds first -- have seen this speed up messy
2025-07-01 05:49:05.858 # compares by a factor of 3.
2025-07-01 05:49:05.863 # note that ratio() is only expensive to compute the first
2025-07-01 05:49:05.868 # time it's called on a sequence pair; the expensive part
2025-07-01 05:49:05.876 # of the computation is cached by cruncher
2025-07-01 05:49:05.887 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:49:05.896 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:49:05.902 cruncher.ratio() > best_ratio:
2025-07-01 05:49:05.908 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:49:05.915 if best_ratio < cutoff:
2025-07-01 05:49:05.926 # no non-identical "pretty close" pair
2025-07-01 05:49:05.935 if eqi is None:
2025-07-01 05:49:05.943 # no identical pair either -- treat it as a straight replace
2025-07-01 05:49:05.955 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:49:05.964 return
2025-07-01 05:49:05.974 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:49:05.980 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:49:05.989 else:
2025-07-01 05:49:05.998 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:49:06.004 eqi = None
2025-07-01 05:49:06.011
2025-07-01 05:49:06.021 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:49:06.032 # identical
2025-07-01 05:49:06.039
2025-07-01 05:49:06.044 # pump out diffs from before the synch point
2025-07-01 05:49:06.050 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:49:06.055
2025-07-01 05:49:06.060 # do intraline marking on the synch pair
2025-07-01 05:49:06.066 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:49:06.076 if eqi is None:
2025-07-01 05:49:06.087 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:49:06.097 atags = btags = ""
2025-07-01 05:49:06.110 cruncher.set_seqs(aelt, belt)
2025-07-01 05:49:06.119 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:49:06.125 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:49:06.134 if tag == 'replace':
2025-07-01 05:49:06.142 atags += '^' * la
2025-07-01 05:49:06.150 btags += '^' * lb
2025-07-01 05:49:06.159 elif tag == 'delete':
2025-07-01 05:49:06.165 atags += '-' * la
2025-07-01 05:49:06.171 elif tag == 'insert':
2025-07-01 05:49:06.175 btags += '+' * lb
2025-07-01 05:49:06.180 elif tag == 'equal':
2025-07-01 05:49:06.185 atags += ' ' * la
2025-07-01 05:49:06.190 btags += ' ' * lb
2025-07-01 05:49:06.196 else:
2025-07-01 05:49:06.202 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:49:06.208 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:49:06.213 else:
2025-07-01 05:49:06.225 # the synch pair is identical
2025-07-01 05:49:06.234 yield ' ' + aelt
2025-07-01 05:49:06.243
2025-07-01 05:49:06.253 # pump out diffs from after the synch point
2025-07-01 05:49:06.262 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:49:06.273
2025-07-01 05:49:06.282 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:49:06.289 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:49:06.295
2025-07-01 05:49:06.300 self = <difflib.Differ object at [hex]>
2025-07-01 05:49:06.306 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:49:06.310 alo = 345, ahi = 1101
2025-07-01 05:49:06.315 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:49:06.320 blo = 345, bhi = 1101
2025-07-01 05:49:06.325
2025-07-01 05:49:06.329 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:49:06.333 g = []
2025-07-01 05:49:06.338 if alo < ahi:
2025-07-01 05:49:06.350 if blo < bhi:
2025-07-01 05:49:06.359 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:49:06.369 else:
2025-07-01 05:49:06.376 g = self._dump('-', a, alo, ahi)
2025-07-01 05:49:06.382 elif blo < bhi:
2025-07-01 05:49:06.389 g = self._dump('+', b, blo, bhi)
2025-07-01 05:49:06.395
2025-07-01 05:49:06.400 > yield from g
2025-07-01 05:49:06.405
2025-07-01 05:49:06.410 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:49:06.414 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:49:06.419
2025-07-01 05:49:06.424 self = <difflib.Differ object at [hex]>
2025-07-01 05:49:06.430 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:49:06.438 alo = 345, ahi = 1101
2025-07-01 05:49:06.446 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:49:06.453 blo = 345, bhi = 1101
2025-07-01 05:49:06.458
2025-07-01 05:49:06.468 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:49:06.475 r"""
2025-07-01 05:49:06.481 When replacing one block of lines with another, search the blocks
2025-07-01 05:49:06.487 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:49:06.493 synch point, and intraline difference marking is done on the
2025-07-01 05:49:06.499 similar pair. Lots of work, but often worth it.
2025-07-01 05:49:06.504
2025-07-01 05:49:06.510 Example:
2025-07-01 05:49:06.515
2025-07-01 05:49:06.522 >>> d = Differ()
2025-07-01 05:49:06.532 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:49:06.541 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:49:06.548 >>> print(''.join(results), end="")
2025-07-01 05:49:06.554 - abcDefghiJkl
2025-07-01 05:49:06.564 + abcdefGhijkl
2025-07-01 05:49:06.580 """
2025-07-01 05:49:06.590
2025-07-01 05:49:06.598 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:49:06.606 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:49:06.613 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:49:06.619 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:49:06.625 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:49:06.631
2025-07-01 05:49:06.637 # search for the pair that matches best without being identical
2025-07-01 05:49:06.642 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:49:06.648 # on junk -- unless we have to)
2025-07-01 05:49:06.654 for j in range(blo, bhi):
2025-07-01 05:49:06.664 bj = b[j]
2025-07-01 05:49:06.674 cruncher.set_seq2(bj)
2025-07-01 05:49:06.681 for i in range(alo, ahi):
2025-07-01 05:49:06.686 ai = a[i]
2025-07-01 05:49:06.693 if ai == bj:
2025-07-01 05:49:06.699 if eqi is None:
2025-07-01 05:49:06.705 eqi, eqj = i, j
2025-07-01 05:49:06.710 continue
2025-07-01 05:49:06.716 cruncher.set_seq1(ai)
2025-07-01 05:49:06.721 # computing similarity is expensive, so use the quick
2025-07-01 05:49:06.727 # upper bounds first -- have seen this speed up messy
2025-07-01 05:49:06.733 # compares by a factor of 3.
2025-07-01 05:49:06.740 # note that ratio() is only expensive to compute the first
2025-07-01 05:49:06.746 # time it's called on a sequence pair; the expensive part
2025-07-01 05:49:06.752 # of the computation is cached by cruncher
2025-07-01 05:49:06.759 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:49:06.767 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:49:06.778 cruncher.ratio() > best_ratio:
2025-07-01 05:49:06.786 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:49:06.795 if best_ratio < cutoff:
2025-07-01 05:49:06.805 # no non-identical "pretty close" pair
2025-07-01 05:49:06.815 if eqi is None:
2025-07-01 05:49:06.823 # no identical pair either -- treat it as a straight replace
2025-07-01 05:49:06.830 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:49:06.836 return
2025-07-01 05:49:06.842 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:49:06.848 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:49:06.854 else:
2025-07-01 05:49:06.860 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:49:06.867 eqi = None
2025-07-01 05:49:06.877
2025-07-01 05:49:06.886 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:49:06.897 # identical
2025-07-01 05:49:06.903
2025-07-01 05:49:06.910 # pump out diffs from before the synch point
2025-07-01 05:49:06.919 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:49:06.926
2025-07-01 05:49:06.933 # do intraline marking on the synch pair
2025-07-01 05:49:06.939 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:49:06.944 if eqi is None:
2025-07-01 05:49:06.949 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:49:06.955 atags = btags = ""
2025-07-01 05:49:06.960 cruncher.set_seqs(aelt, belt)
2025-07-01 05:49:06.965 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:49:06.971 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:49:06.978 if tag == 'replace':
2025-07-01 05:49:06.984 atags += '^' * la
2025-07-01 05:49:06.991 btags += '^' * lb
2025-07-01 05:49:06.996 elif tag == 'delete':
2025-07-01 05:49:07.003 atags += '-' * la
2025-07-01 05:49:07.009 elif tag == 'insert':
2025-07-01 05:49:07.014 btags += '+' * lb
2025-07-01 05:49:07.020 elif tag == 'equal':
2025-07-01 05:49:07.026 atags += ' ' * la
2025-07-01 05:49:07.036 btags += ' ' * lb
2025-07-01 05:49:07.045 else:
2025-07-01 05:49:07.053 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:49:07.060 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:49:07.065 else:
2025-07-01 05:49:07.069 # the synch pair is identical
2025-07-01 05:49:07.073 yield ' ' + aelt
2025-07-01 05:49:07.077
2025-07-01 05:49:07.082 # pump out diffs from after the synch point
2025-07-01 05:49:07.086 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:49:07.090
2025-07-01 05:49:07.095 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:49:07.099 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:49:07.103
2025-07-01 05:49:07.108 self = <difflib.Differ object at [hex]>
2025-07-01 05:49:07.113 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:49:07.117 alo = 346, ahi = 1101
2025-07-01 05:49:07.122 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:49:07.126 blo = 346, bhi = 1101
2025-07-01 05:49:07.130
2025-07-01 05:49:07.135 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:49:07.139 g = []
2025-07-01 05:49:07.143 if alo < ahi:
2025-07-01 05:49:07.148 if blo < bhi:
2025-07-01 05:49:07.152 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:49:07.157 else:
2025-07-01 05:49:07.161 g = self._dump('-', a, alo, ahi)
2025-07-01 05:49:07.166 elif blo < bhi:
2025-07-01 05:49:07.170 g = self._dump('+', b, blo, bhi)
2025-07-01 05:49:07.174
2025-07-01 05:49:07.179 > yield from g
2025-07-01 05:49:07.183
2025-07-01 05:49:07.187 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:49:07.192 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:49:07.196
2025-07-01 05:49:07.200 self = <difflib.Differ object at [hex]>
2025-07-01 05:49:07.205 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:49:07.210 alo = 346, ahi = 1101
2025-07-01 05:49:07.215 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:49:07.219 blo = 346, bhi = 1101
2025-07-01 05:49:07.223
2025-07-01 05:49:07.228 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:49:07.232 r"""
2025-07-01 05:49:07.237 When replacing one block of lines with another, search the blocks
2025-07-01 05:49:07.241 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:49:07.246 synch point, and intraline difference marking is done on the
2025-07-01 05:49:07.250 similar pair. Lots of work, but often worth it.
2025-07-01 05:49:07.255
2025-07-01 05:49:07.259 Example:
2025-07-01 05:49:07.263
2025-07-01 05:49:07.268 >>> d = Differ()
2025-07-01 05:49:07.272 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:49:07.277 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:49:07.281 >>> print(''.join(results), end="")
2025-07-01 05:49:07.286 - abcDefghiJkl
2025-07-01 05:49:07.294 + abcdefGhijkl
2025-07-01 05:49:07.303 """
2025-07-01 05:49:07.308
2025-07-01 05:49:07.312 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:49:07.317 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:49:07.322 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:49:07.326 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:49:07.330 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:49:07.335
2025-07-01 05:49:07.339 # search for the pair that matches best without being identical
2025-07-01 05:49:07.344 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:49:07.348 # on junk -- unless we have to)
2025-07-01 05:49:07.352 for j in range(blo, bhi):
2025-07-01 05:49:07.357 bj = b[j]
2025-07-01 05:49:07.361 cruncher.set_seq2(bj)
2025-07-01 05:49:07.365 for i in range(alo, ahi):
2025-07-01 05:49:07.370 ai = a[i]
2025-07-01 05:49:07.374 if ai == bj:
2025-07-01 05:49:07.378 if eqi is None:
2025-07-01 05:49:07.383 eqi, eqj = i, j
2025-07-01 05:49:07.387 continue
2025-07-01 05:49:07.392 cruncher.set_seq1(ai)
2025-07-01 05:49:07.396 # computing similarity is expensive, so use the quick
2025-07-01 05:49:07.409 # upper bounds first -- have seen this speed up messy
2025-07-01 05:49:07.415 # compares by a factor of 3.
2025-07-01 05:49:07.421 # note that ratio() is only expensive to compute the first
2025-07-01 05:49:07.427 # time it's called on a sequence pair; the expensive part
2025-07-01 05:49:07.437 # of the computation is cached by cruncher
2025-07-01 05:49:07.444 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:49:07.450 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:49:07.456 cruncher.ratio() > best_ratio:
2025-07-01 05:49:07.461 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:49:07.472 if best_ratio < cutoff:
2025-07-01 05:49:07.482 # no non-identical "pretty close" pair
2025-07-01 05:49:07.488 if eqi is None:
2025-07-01 05:49:07.495 # no identical pair either -- treat it as a straight replace
2025-07-01 05:49:07.506 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:49:07.517 return
2025-07-01 05:49:07.529 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:49:07.537 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:49:07.550 else:
2025-07-01 05:49:07.561 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:49:07.569 eqi = None
2025-07-01 05:49:07.576
2025-07-01 05:49:07.583 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:49:07.591 # identical
2025-07-01 05:49:07.602
2025-07-01 05:49:07.613 # pump out diffs from before the synch point
2025-07-01 05:49:07.624 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:49:07.634
2025-07-01 05:49:07.643 # do intraline marking on the synch pair
2025-07-01 05:49:07.651 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:49:07.662 if eqi is None:
2025-07-01 05:49:07.671 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:49:07.684 atags = btags = ""
2025-07-01 05:49:07.696 cruncher.set_seqs(aelt, belt)
2025-07-01 05:49:07.706 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:49:07.715 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:49:07.726 if tag == 'replace':
2025-07-01 05:49:07.739 atags += '^' * la
2025-07-01 05:49:07.747 btags += '^' * lb
2025-07-01 05:49:07.754 elif tag == 'delete':
2025-07-01 05:49:07.767 atags += '-' * la
2025-07-01 05:49:07.777 elif tag == 'insert':
2025-07-01 05:49:07.785 btags += '+' * lb
2025-07-01 05:49:07.792 elif tag == 'equal':
2025-07-01 05:49:07.799 atags += ' ' * la
2025-07-01 05:49:07.805 btags += ' ' * lb
2025-07-01 05:49:07.811 else:
2025-07-01 05:49:07.817 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:49:07.821 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:49:07.825 else:
2025-07-01 05:49:07.831 # the synch pair is identical
2025-07-01 05:49:07.837 yield ' ' + aelt
2025-07-01 05:49:07.842
2025-07-01 05:49:07.848 # pump out diffs from after the synch point
2025-07-01 05:49:07.854 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:49:07.863
2025-07-01 05:49:07.873 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:49:07.880 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:49:07.890
2025-07-01 05:49:07.899 self = <difflib.Differ object at [hex]>
2025-07-01 05:49:07.907 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:49:07.914 alo = 347, ahi = 1101
2025-07-01 05:49:07.921 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:49:07.933 blo = 347, bhi = 1101
2025-07-01 05:49:07.945
2025-07-01 05:49:07.957 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:49:07.968 g = []
2025-07-01 05:49:07.976 if alo < ahi:
2025-07-01 05:49:07.983 if blo < bhi:
2025-07-01 05:49:07.988 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:49:07.994 else:
2025-07-01 05:49:08.004 g = self._dump('-', a, alo, ahi)
2025-07-01 05:49:08.013 elif blo < bhi:
2025-07-01 05:49:08.020 g = self._dump('+', b, blo, bhi)
2025-07-01 05:49:08.029
2025-07-01 05:49:08.038 > yield from g
2025-07-01 05:49:08.045
2025-07-01 05:49:08.052 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:49:08.059 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:49:08.065
2025-07-01 05:49:08.071 self = <difflib.Differ object at [hex]>
2025-07-01 05:49:08.079 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:49:08.090 alo = 347, ahi = 1101
2025-07-01 05:49:08.098 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:49:08.104 blo = 347, bhi = 1101
2025-07-01 05:49:08.109
2025-07-01 05:49:08.114 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:49:08.118 r"""
2025-07-01 05:49:08.124 When replacing one block of lines with another, search the blocks
2025-07-01 05:49:08.129 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:49:08.135 synch point, and intraline difference marking is done on the
2025-07-01 05:49:08.140 similar pair. Lots of work, but often worth it.
2025-07-01 05:49:08.144
2025-07-01 05:49:08.149 Example:
2025-07-01 05:49:08.155
2025-07-01 05:49:08.160 >>> d = Differ()
2025-07-01 05:49:08.166 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:49:08.171 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:49:08.178 >>> print(''.join(results), end="")
2025-07-01 05:49:08.186 - abcDefghiJkl
2025-07-01 05:49:08.204 + abcdefGhijkl
2025-07-01 05:49:08.221 """
2025-07-01 05:49:08.227
2025-07-01 05:49:08.233 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:49:08.239 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:49:08.245 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:49:08.251 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:49:08.258 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:49:08.264
2025-07-01 05:49:08.271 # search for the pair that matches best without being identical
2025-07-01 05:49:08.278 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:49:08.285 # on junk -- unless we have to)
2025-07-01 05:49:08.291 for j in range(blo, bhi):
2025-07-01 05:49:08.298 bj = b[j]
2025-07-01 05:49:08.305 cruncher.set_seq2(bj)
2025-07-01 05:49:08.311 for i in range(alo, ahi):
2025-07-01 05:49:08.318 ai = a[i]
2025-07-01 05:49:08.329 if ai == bj:
2025-07-01 05:49:08.340 if eqi is None:
2025-07-01 05:49:08.350 eqi, eqj = i, j
2025-07-01 05:49:08.357 continue
2025-07-01 05:49:08.364 cruncher.set_seq1(ai)
2025-07-01 05:49:08.377 # computing similarity is expensive, so use the quick
2025-07-01 05:49:08.387 # upper bounds first -- have seen this speed up messy
2025-07-01 05:49:08.395 # compares by a factor of 3.
2025-07-01 05:49:08.402 # note that ratio() is only expensive to compute the first
2025-07-01 05:49:08.409 # time it's called on a sequence pair; the expensive part
2025-07-01 05:49:08.414 # of the computation is cached by cruncher
2025-07-01 05:49:08.423 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:49:08.433 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:49:08.441 cruncher.ratio() > best_ratio:
2025-07-01 05:49:08.447 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:49:08.452 if best_ratio < cutoff:
2025-07-01 05:49:08.457 # no non-identical "pretty close" pair
2025-07-01 05:49:08.461 if eqi is None:
2025-07-01 05:49:08.466 # no identical pair either -- treat it as a straight replace
2025-07-01 05:49:08.470 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:49:08.475 return
2025-07-01 05:49:08.479 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:49:08.483 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:49:08.489 else:
2025-07-01 05:49:08.499 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:49:08.507 eqi = None
2025-07-01 05:49:08.514
2025-07-01 05:49:08.521 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:49:08.527 # identical
2025-07-01 05:49:08.533
2025-07-01 05:49:08.539 # pump out diffs from before the synch point
2025-07-01 05:49:08.544 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:49:08.550
2025-07-01 05:49:08.556 # do intraline marking on the synch pair
2025-07-01 05:49:08.561 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:49:08.568 if eqi is None:
2025-07-01 05:49:08.574 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:49:08.581 atags = btags = ""
2025-07-01 05:49:08.588 cruncher.set_seqs(aelt, belt)
2025-07-01 05:49:08.594 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:49:08.601 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:49:08.608 if tag == 'replace':
2025-07-01 05:49:08.615 atags += '^' * la
2025-07-01 05:49:08.627 btags += '^' * lb
2025-07-01 05:49:08.639 elif tag == 'delete':
2025-07-01 05:49:08.649 atags += '-' * la
2025-07-01 05:49:08.656 elif tag == 'insert':
2025-07-01 05:49:08.662 btags += '+' * lb
2025-07-01 05:49:08.667 elif tag == 'equal':
2025-07-01 05:49:08.673 atags += ' ' * la
2025-07-01 05:49:08.679 btags += ' ' * lb
2025-07-01 05:49:08.684 else:
2025-07-01 05:49:08.689 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:49:08.697 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:49:08.706 else:
2025-07-01 05:49:08.711 # the synch pair is identical
2025-07-01 05:49:08.716 yield ' ' + aelt
2025-07-01 05:49:08.721
2025-07-01 05:49:08.727 # pump out diffs from after the synch point
2025-07-01 05:49:08.732 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:49:08.739
2025-07-01 05:49:08.744 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:49:08.751 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:49:08.756
2025-07-01 05:49:08.762 self = <difflib.Differ object at [hex]>
2025-07-01 05:49:08.769 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:49:08.775 alo = 348, ahi = 1101
2025-07-01 05:49:08.780 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:49:08.786 blo = 348, bhi = 1101
2025-07-01 05:49:08.792
2025-07-01 05:49:08.805 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:49:08.814 g = []
2025-07-01 05:49:08.824 if alo < ahi:
2025-07-01 05:49:08.833 if blo < bhi:
2025-07-01 05:49:08.840 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:49:08.846 else:
2025-07-01 05:49:08.852 g = self._dump('-', a, alo, ahi)
2025-07-01 05:49:08.858 elif blo < bhi:
2025-07-01 05:49:08.864 g = self._dump('+', b, blo, bhi)
2025-07-01 05:49:08.868
2025-07-01 05:49:08.873 > yield from g
2025-07-01 05:49:08.879
2025-07-01 05:49:08.885 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:49:08.890 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:49:08.896
2025-07-01 05:49:08.902 self = <difflib.Differ object at [hex]>
2025-07-01 05:49:08.910 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:49:08.921 alo = 348, ahi = 1101
2025-07-01 05:49:08.931 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:49:08.939 blo = 348, bhi = 1101
2025-07-01 05:49:08.947
2025-07-01 05:49:08.955 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:49:08.966 r"""
2025-07-01 05:49:08.976 When replacing one block of lines with another, search the blocks
2025-07-01 05:49:08.983 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:49:08.995 synch point, and intraline difference marking is done on the
2025-07-01 05:49:09.002 similar pair. Lots of work, but often worth it.
2025-07-01 05:49:09.010
2025-07-01 05:49:09.023 Example:
2025-07-01 05:49:09.035
2025-07-01 05:49:09.048 >>> d = Differ()
2025-07-01 05:49:09.059 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:49:09.070 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:49:09.078 >>> print(''.join(results), end="")
2025-07-01 05:49:09.084 - abcDefghiJkl
2025-07-01 05:49:09.095 + abcdefGhijkl
2025-07-01 05:49:09.106 """
2025-07-01 05:49:09.113
2025-07-01 05:49:09.119 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:49:09.125 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:49:09.131 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:49:09.136 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:49:09.142 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:49:09.148
2025-07-01 05:49:09.154 # search for the pair that matches best without being identical
2025-07-01 05:49:09.160 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:49:09.166 # on junk -- unless we have to)
2025-07-01 05:49:09.173 for j in range(blo, bhi):
2025-07-01 05:49:09.182 bj = b[j]
2025-07-01 05:49:09.189 cruncher.set_seq2(bj)
2025-07-01 05:49:09.195 for i in range(alo, ahi):
2025-07-01 05:49:09.204 ai = a[i]
2025-07-01 05:49:09.219 if ai == bj:
2025-07-01 05:49:09.227 if eqi is None:
2025-07-01 05:49:09.234 eqi, eqj = i, j
2025-07-01 05:49:09.241 continue
2025-07-01 05:49:09.249 cruncher.set_seq1(ai)
2025-07-01 05:49:09.255 # computing similarity is expensive, so use the quick
2025-07-01 05:49:09.262 # upper bounds first -- have seen this speed up messy
2025-07-01 05:49:09.268 # compares by a factor of 3.
2025-07-01 05:49:09.274 # note that ratio() is only expensive to compute the first
2025-07-01 05:49:09.281 # time it's called on a sequence pair; the expensive part
2025-07-01 05:49:09.287 # of the computation is cached by cruncher
2025-07-01 05:49:09.294 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:49:09.301 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:49:09.309 cruncher.ratio() > best_ratio:
2025-07-01 05:49:09.319 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:49:09.332 if best_ratio < cutoff:
2025-07-01 05:49:09.343 # no non-identical "pretty close" pair
2025-07-01 05:49:09.352 if eqi is None:
2025-07-01 05:49:09.361 # no identical pair either -- treat it as a straight replace
2025-07-01 05:49:09.367 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:49:09.374 return
2025-07-01 05:49:09.385 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:49:09.393 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:49:09.402 else:
2025-07-01 05:49:09.413 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:49:09.420 eqi = None
2025-07-01 05:49:09.427
2025-07-01 05:49:09.437 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:49:09.448 # identical
2025-07-01 05:49:09.458
2025-07-01 05:49:09.469 # pump out diffs from before the synch point
2025-07-01 05:49:09.478 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:49:09.486
2025-07-01 05:49:09.498 # do intraline marking on the synch pair
2025-07-01 05:49:09.507 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:49:09.515 if eqi is None:
2025-07-01 05:49:09.522 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:49:09.528 atags = btags = ""
2025-07-01 05:49:09.534 cruncher.set_seqs(aelt, belt)
2025-07-01 05:49:09.540 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:49:09.546 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:49:09.551 if tag == 'replace':
2025-07-01 05:49:09.556 atags += '^' * la
2025-07-01 05:49:09.562 btags += '^' * lb
2025-07-01 05:49:09.567 elif tag == 'delete':
2025-07-01 05:49:09.572 atags += '-' * la
2025-07-01 05:49:09.577 elif tag == 'insert':
2025-07-01 05:49:09.582 btags += '+' * lb
2025-07-01 05:49:09.588 elif tag == 'equal':
2025-07-01 05:49:09.593 atags += ' ' * la
2025-07-01 05:49:09.599 btags += ' ' * lb
2025-07-01 05:49:09.604 else:
2025-07-01 05:49:09.611 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:49:09.618 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:49:09.624 else:
2025-07-01 05:49:09.629 # the synch pair is identical
2025-07-01 05:49:09.634 yield ' ' + aelt
2025-07-01 05:49:09.640
2025-07-01 05:49:09.652 # pump out diffs from after the synch point
2025-07-01 05:49:09.662 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:49:09.670
2025-07-01 05:49:09.678 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:49:09.687 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:49:09.699
2025-07-01 05:49:09.708 self = <difflib.Differ object at [hex]>
2025-07-01 05:49:09.716 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:49:09.722 alo = 349, ahi = 1101
2025-07-01 05:49:09.730 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:49:09.739 blo = 349, bhi = 1101
2025-07-01 05:49:09.750
2025-07-01 05:49:09.762 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:49:09.775 g = []
2025-07-01 05:49:09.783 if alo < ahi:
2025-07-01 05:49:09.791 if blo < bhi:
2025-07-01 05:49:09.802 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:49:09.813 else:
2025-07-01 05:49:09.826 g = self._dump('-', a, alo, ahi)
2025-07-01 05:49:09.838 elif blo < bhi:
2025-07-01 05:49:09.852 g = self._dump('+', b, blo, bhi)
2025-07-01 05:49:09.863
2025-07-01 05:49:09.871 > yield from g
2025-07-01 05:49:09.878
2025-07-01 05:49:09.884 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:49:09.890 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:49:09.895
2025-07-01 05:49:09.903 self = <difflib.Differ object at [hex]>
2025-07-01 05:49:09.913 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:49:09.920 alo = 349, ahi = 1101
2025-07-01 05:49:09.928 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:49:09.934 blo = 349, bhi = 1101
2025-07-01 05:49:09.941
2025-07-01 05:49:09.953 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:49:09.965 r"""
2025-07-01 05:49:09.977 When replacing one block of lines with another, search the blocks
2025-07-01 05:49:09.987 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:49:09.999 synch point, and intraline difference marking is done on the
2025-07-01 05:49:10.008 similar pair. Lots of work, but often worth it.
2025-07-01 05:49:10.015
2025-07-01 05:49:10.022 Example:
2025-07-01 05:49:10.032
2025-07-01 05:49:10.042 >>> d = Differ()
2025-07-01 05:49:10.050 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:49:10.057 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:49:10.069 >>> print(''.join(results), end="")
2025-07-01 05:49:10.079 - abcDefghiJkl
2025-07-01 05:49:10.102 + abcdefGhijkl
2025-07-01 05:49:10.121 """
2025-07-01 05:49:10.128
2025-07-01 05:49:10.135 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:49:10.146 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:49:10.155 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:49:10.161 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:49:10.169 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:49:10.176
2025-07-01 05:49:10.183 # search for the pair that matches best without being identical
2025-07-01 05:49:10.190 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:49:10.197 # on junk -- unless we have to)
2025-07-01 05:49:10.203 for j in range(blo, bhi):
2025-07-01 05:49:10.210 bj = b[j]
2025-07-01 05:49:10.217 cruncher.set_seq2(bj)
2025-07-01 05:49:10.223 for i in range(alo, ahi):
2025-07-01 05:49:10.229 ai = a[i]
2025-07-01 05:49:10.236 if ai == bj:
2025-07-01 05:49:10.243 if eqi is None:
2025-07-01 05:49:10.250 eqi, eqj = i, j
2025-07-01 05:49:10.261 continue
2025-07-01 05:49:10.269 cruncher.set_seq1(ai)
2025-07-01 05:49:10.276 # computing similarity is expensive, so use the quick
2025-07-01 05:49:10.282 # upper bounds first -- have seen this speed up messy
2025-07-01 05:49:10.287 # compares by a factor of 3.
2025-07-01 05:49:10.294 # note that ratio() is only expensive to compute the first
2025-07-01 05:49:10.301 # time it's called on a sequence pair; the expensive part
2025-07-01 05:49:10.307 # of the computation is cached by cruncher
2025-07-01 05:49:10.313 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:49:10.319 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:49:10.325 cruncher.ratio() > best_ratio:
2025-07-01 05:49:10.331 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:49:10.340 if best_ratio < cutoff:
2025-07-01 05:49:10.349 # no non-identical "pretty close" pair
2025-07-01 05:49:10.359 if eqi is None:
2025-07-01 05:49:10.370 # no identical pair either -- treat it as a straight replace
2025-07-01 05:49:10.382 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:49:10.391 return
2025-07-01 05:49:10.398 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:49:10.405 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:49:10.412 else:
2025-07-01 05:49:10.419 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:49:10.426 eqi = None
2025-07-01 05:49:10.432
2025-07-01 05:49:10.438 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:49:10.449 # identical
2025-07-01 05:49:10.459
2025-07-01 05:49:10.467 # pump out diffs from before the synch point
2025-07-01 05:49:10.475 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:49:10.482
2025-07-01 05:49:10.489 # do intraline marking on the synch pair
2025-07-01 05:49:10.496 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:49:10.504 if eqi is None:
2025-07-01 05:49:10.511 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:49:10.517 atags = btags = ""
2025-07-01 05:49:10.530 cruncher.set_seqs(aelt, belt)
2025-07-01 05:49:10.542 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:49:10.554 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:49:10.561 if tag == 'replace':
2025-07-01 05:49:10.569 atags += '^' * la
2025-07-01 05:49:10.575 btags += '^' * lb
2025-07-01 05:49:10.581 elif tag == 'delete':
2025-07-01 05:49:10.588 atags += '-' * la
2025-07-01 05:49:10.594 elif tag == 'insert':
2025-07-01 05:49:10.600 btags += '+' * lb
2025-07-01 05:49:10.606 elif tag == 'equal':
2025-07-01 05:49:10.612 atags += ' ' * la
2025-07-01 05:49:10.618 btags += ' ' * lb
2025-07-01 05:49:10.623 else:
2025-07-01 05:49:10.635 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:49:10.643 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:49:10.651 else:
2025-07-01 05:49:10.661 # the synch pair is identical
2025-07-01 05:49:10.669 yield ' ' + aelt
2025-07-01 05:49:10.677
2025-07-01 05:49:10.683 # pump out diffs from after the synch point
2025-07-01 05:49:10.691 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:49:10.701
2025-07-01 05:49:10.709 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:49:10.716 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:49:10.722
2025-07-01 05:49:10.732 self = <difflib.Differ object at [hex]>
2025-07-01 05:49:10.742 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:49:10.751 alo = 350, ahi = 1101
2025-07-01 05:49:10.759 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:49:10.767 blo = 350, bhi = 1101
2025-07-01 05:49:10.778
2025-07-01 05:49:10.786 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:49:10.797 g = []
2025-07-01 05:49:10.807 if alo < ahi:
2025-07-01 05:49:10.815 if blo < bhi:
2025-07-01 05:49:10.822 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:49:10.829 else:
2025-07-01 05:49:10.839 g = self._dump('-', a, alo, ahi)
2025-07-01 05:49:10.847 elif blo < bhi:
2025-07-01 05:49:10.854 g = self._dump('+', b, blo, bhi)
2025-07-01 05:49:10.865
2025-07-01 05:49:10.875 > yield from g
2025-07-01 05:49:10.883
2025-07-01 05:49:10.889 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:49:10.896 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:49:10.902
2025-07-01 05:49:10.911 self = <difflib.Differ object at [hex]>
2025-07-01 05:49:10.923 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:49:10.932 alo = 350, ahi = 1101
2025-07-01 05:49:10.939 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:49:10.944 blo = 350, bhi = 1101
2025-07-01 05:49:10.949
2025-07-01 05:49:10.953 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:49:10.958 r"""
2025-07-01 05:49:10.963 When replacing one block of lines with another, search the blocks
2025-07-01 05:49:10.968 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:49:10.975 synch point, and intraline difference marking is done on the
2025-07-01 05:49:10.981 similar pair. Lots of work, but often worth it.
2025-07-01 05:49:10.986
2025-07-01 05:49:10.992 Example:
2025-07-01 05:49:10.998
2025-07-01 05:49:11.007 >>> d = Differ()
2025-07-01 05:49:11.014 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:49:11.021 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:49:11.027 >>> print(''.join(results), end="")
2025-07-01 05:49:11.039 - abcDefghiJkl
2025-07-01 05:49:11.061 + abcdefGhijkl
2025-07-01 05:49:11.077 """
2025-07-01 05:49:11.089
2025-07-01 05:49:11.098 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:49:11.106 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:49:11.115 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:49:11.126 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:49:11.135 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:49:11.145
2025-07-01 05:49:11.154 # search for the pair that matches best without being identical
2025-07-01 05:49:11.162 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:49:11.169 # on junk -- unless we have to)
2025-07-01 05:49:11.179 for j in range(blo, bhi):
2025-07-01 05:49:11.187 bj = b[j]
2025-07-01 05:49:11.194 cruncher.set_seq2(bj)
2025-07-01 05:49:11.200 for i in range(alo, ahi):
2025-07-01 05:49:11.205 ai = a[i]
2025-07-01 05:49:11.211 if ai == bj:
2025-07-01 05:49:11.216 if eqi is None:
2025-07-01 05:49:11.222 eqi, eqj = i, j
2025-07-01 05:49:11.228 continue
2025-07-01 05:49:11.233 cruncher.set_seq1(ai)
2025-07-01 05:49:11.239 # computing similarity is expensive, so use the quick
2025-07-01 05:49:11.252 # upper bounds first -- have seen this speed up messy
2025-07-01 05:49:11.261 # compares by a factor of 3.
2025-07-01 05:49:11.268 # note that ratio() is only expensive to compute the first
2025-07-01 05:49:11.274 # time it's called on a sequence pair; the expensive part
2025-07-01 05:49:11.284 # of the computation is cached by cruncher
2025-07-01 05:49:11.294 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:49:11.300 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:49:11.307 cruncher.ratio() > best_ratio:
2025-07-01 05:49:11.312 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:49:11.318 if best_ratio < cutoff:
2025-07-01 05:49:11.327 # no non-identical "pretty close" pair
2025-07-01 05:49:11.338 if eqi is None:
2025-07-01 05:49:11.345 # no identical pair either -- treat it as a straight replace
2025-07-01 05:49:11.352 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:49:11.359 return
2025-07-01 05:49:11.368 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:49:11.379 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:49:11.389 else:
2025-07-01 05:49:11.397 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:49:11.403 eqi = None
2025-07-01 05:49:11.408
2025-07-01 05:49:11.414 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:49:11.422 # identical
2025-07-01 05:49:11.426
2025-07-01 05:49:11.431 # pump out diffs from before the synch point
2025-07-01 05:49:11.443 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:49:11.452
2025-07-01 05:49:11.462 # do intraline marking on the synch pair
2025-07-01 05:49:11.470 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:49:11.477 if eqi is None:
2025-07-01 05:49:11.484 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:49:11.491 atags = btags = ""
2025-07-01 05:49:11.501 cruncher.set_seqs(aelt, belt)
2025-07-01 05:49:11.512 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:49:11.521 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:49:11.533 if tag == 'replace':
2025-07-01 05:49:11.547 atags += '^' * la
2025-07-01 05:49:11.560 btags += '^' * lb
2025-07-01 05:49:11.572 elif tag == 'delete':
2025-07-01 05:49:11.580 atags += '-' * la
2025-07-01 05:49:11.587 elif tag == 'insert':
2025-07-01 05:49:11.595 btags += '+' * lb
2025-07-01 05:49:11.608 elif tag == 'equal':
2025-07-01 05:49:11.617 atags += ' ' * la
2025-07-01 05:49:11.626 btags += ' ' * lb
2025-07-01 05:49:11.638 else:
2025-07-01 05:49:11.650 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:49:11.660 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:49:11.671 else:
2025-07-01 05:49:11.683 # the synch pair is identical
2025-07-01 05:49:11.697 yield ' ' + aelt
2025-07-01 05:49:11.710
2025-07-01 05:49:11.719 # pump out diffs from after the synch point
2025-07-01 05:49:11.725 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:49:11.730
2025-07-01 05:49:11.740 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:49:11.755 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:49:11.767
2025-07-01 05:49:11.780 self = <difflib.Differ object at [hex]>
2025-07-01 05:49:11.792 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:49:11.801 alo = 351, ahi = 1101
2025-07-01 05:49:11.811 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:49:11.817 blo = 351, bhi = 1101
2025-07-01 05:49:11.829
2025-07-01 05:49:11.838 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:49:11.847 g = []
2025-07-01 05:49:11.856 if alo < ahi:
2025-07-01 05:49:11.863 if blo < bhi:
2025-07-01 05:49:11.870 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:49:11.876 else:
2025-07-01 05:49:11.882 g = self._dump('-', a, alo, ahi)
2025-07-01 05:49:11.892 elif blo < bhi:
2025-07-01 05:49:11.902 g = self._dump('+', b, blo, bhi)
2025-07-01 05:49:11.911
2025-07-01 05:49:11.922 > yield from g
2025-07-01 05:49:11.934
2025-07-01 05:49:11.941 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:49:11.946 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:49:11.950
2025-07-01 05:49:11.962 self = <difflib.Differ object at [hex]>
2025-07-01 05:49:11.973 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:49:11.985 alo = 351, ahi = 1101
2025-07-01 05:49:11.996 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:49:12.004 blo = 351, bhi = 1101
2025-07-01 05:49:12.012
2025-07-01 05:49:12.026 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:49:12.038 r"""
2025-07-01 05:49:12.046 When replacing one block of lines with another, search the blocks
2025-07-01 05:49:12.053 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:49:12.059 synch point, and intraline difference marking is done on the
2025-07-01 05:49:12.065 similar pair. Lots of work, but often worth it.
2025-07-01 05:49:12.071
2025-07-01 05:49:12.077 Example:
2025-07-01 05:49:12.082
2025-07-01 05:49:12.088 >>> d = Differ()
2025-07-01 05:49:12.099 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:49:12.108 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:49:12.118 >>> print(''.join(results), end="")
2025-07-01 05:49:12.126 - abcDefghiJkl
2025-07-01 05:49:12.139 + abcdefGhijkl
2025-07-01 05:49:12.152 """
2025-07-01 05:49:12.158
2025-07-01 05:49:12.165 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:49:12.173 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:49:12.179 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:49:12.187 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:49:12.199 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:49:12.204
2025-07-01 05:49:12.209 # search for the pair that matches best without being identical
2025-07-01 05:49:12.214 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:49:12.219 # on junk -- unless we have to)
2025-07-01 05:49:12.225 for j in range(blo, bhi):
2025-07-01 05:49:12.231 bj = b[j]
2025-07-01 05:49:12.236 cruncher.set_seq2(bj)
2025-07-01 05:49:12.242 for i in range(alo, ahi):
2025-07-01 05:49:12.252 ai = a[i]
2025-07-01 05:49:12.260 if ai == bj:
2025-07-01 05:49:12.269 if eqi is None:
2025-07-01 05:49:12.275 eqi, eqj = i, j
2025-07-01 05:49:12.281 continue
2025-07-01 05:49:12.287 cruncher.set_seq1(ai)
2025-07-01 05:49:12.293 # computing similarity is expensive, so use the quick
2025-07-01 05:49:12.299 # upper bounds first -- have seen this speed up messy
2025-07-01 05:49:12.306 # compares by a factor of 3.
2025-07-01 05:49:12.317 # note that ratio() is only expensive to compute the first
2025-07-01 05:49:12.325 # time it's called on a sequence pair; the expensive part
2025-07-01 05:49:12.332 # of the computation is cached by cruncher
2025-07-01 05:49:12.338 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:49:12.344 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:49:12.353 cruncher.ratio() > best_ratio:
2025-07-01 05:49:12.360 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:49:12.369 if best_ratio < cutoff:
2025-07-01 05:49:12.381 # no non-identical "pretty close" pair
2025-07-01 05:49:12.388 if eqi is None:
2025-07-01 05:49:12.396 # no identical pair either -- treat it as a straight replace
2025-07-01 05:49:12.403 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:49:12.411 return
2025-07-01 05:49:12.423 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:49:12.433 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:49:12.441 else:
2025-07-01 05:49:12.447 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:49:12.454 eqi = None
2025-07-01 05:49:12.460
2025-07-01 05:49:12.467 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:49:12.475 # identical
2025-07-01 05:49:12.486
2025-07-01 05:49:12.495 # pump out diffs from before the synch point
2025-07-01 05:49:12.502 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:49:12.508
2025-07-01 05:49:12.519 # do intraline marking on the synch pair
2025-07-01 05:49:12.529 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:49:12.541 if eqi is None:
2025-07-01 05:49:12.551 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:49:12.559 atags = btags = ""
2025-07-01 05:49:12.566 cruncher.set_seqs(aelt, belt)
2025-07-01 05:49:12.576 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:49:12.587 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:49:12.597 if tag == 'replace':
2025-07-01 05:49:12.605 atags += '^' * la
2025-07-01 05:49:12.612 btags += '^' * lb
2025-07-01 05:49:12.623 elif tag == 'delete':
2025-07-01 05:49:12.636 atags += '-' * la
2025-07-01 05:49:12.648 elif tag == 'insert':
2025-07-01 05:49:12.660 btags += '+' * lb
2025-07-01 05:49:12.669 elif tag == 'equal':
2025-07-01 05:49:12.675 atags += ' ' * la
2025-07-01 05:49:12.682 btags += ' ' * lb
2025-07-01 05:49:12.689 else:
2025-07-01 05:49:12.699 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:49:12.711 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:49:12.720 else:
2025-07-01 05:49:12.727 # the synch pair is identical
2025-07-01 05:49:12.738 yield ' ' + aelt
2025-07-01 05:49:12.749
2025-07-01 05:49:12.761 # pump out diffs from after the synch point
2025-07-01 05:49:12.773 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:49:12.783
2025-07-01 05:49:12.792 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:49:12.805 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:49:12.816
2025-07-01 05:49:12.827 self = <difflib.Differ object at [hex]>
2025-07-01 05:49:12.841 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:49:12.853 alo = 352, ahi = 1101
2025-07-01 05:49:12.865 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:49:12.876 blo = 352, bhi = 1101
2025-07-01 05:49:12.888
2025-07-01 05:49:12.900 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:49:12.912 g = []
2025-07-01 05:49:12.923 if alo < ahi:
2025-07-01 05:49:12.932 if blo < bhi:
2025-07-01 05:49:12.940 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:49:12.946 else:
2025-07-01 05:49:12.954 g = self._dump('-', a, alo, ahi)
2025-07-01 05:49:12.964 elif blo < bhi:
2025-07-01 05:49:12.975 g = self._dump('+', b, blo, bhi)
2025-07-01 05:49:12.984
2025-07-01 05:49:12.992 > yield from g
2025-07-01 05:49:12.999
2025-07-01 05:49:13.007 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:49:13.016 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:49:13.022
2025-07-01 05:49:13.028 self = <difflib.Differ object at [hex]>
2025-07-01 05:49:13.040 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:49:13.051 alo = 352, ahi = 1101
2025-07-01 05:49:13.059 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:49:13.068 blo = 352, bhi = 1101
2025-07-01 05:49:13.079
2025-07-01 05:49:13.087 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:49:13.093 r"""
2025-07-01 05:49:13.100 When replacing one block of lines with another, search the blocks
2025-07-01 05:49:13.107 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:49:13.114 synch point, and intraline difference marking is done on the
2025-07-01 05:49:13.120 similar pair. Lots of work, but often worth it.
2025-07-01 05:49:13.126
2025-07-01 05:49:13.132 Example:
2025-07-01 05:49:13.138
2025-07-01 05:49:13.144 >>> d = Differ()
2025-07-01 05:49:13.150 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:49:13.157 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:49:13.163 >>> print(''.join(results), end="")
2025-07-01 05:49:13.171 - abcDefghiJkl
2025-07-01 05:49:13.192 + abcdefGhijkl
2025-07-01 05:49:13.207 """
2025-07-01 05:49:13.214
2025-07-01 05:49:13.224 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:49:13.237 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:49:13.247 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:49:13.255 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:49:13.267 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:49:13.277
2025-07-01 05:49:13.285 # search for the pair that matches best without being identical
2025-07-01 05:49:13.293 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:49:13.300 # on junk -- unless we have to)
2025-07-01 05:49:13.307 for j in range(blo, bhi):
2025-07-01 05:49:13.314 bj = b[j]
2025-07-01 05:49:13.321 cruncher.set_seq2(bj)
2025-07-01 05:49:13.327 for i in range(alo, ahi):
2025-07-01 05:49:13.334 ai = a[i]
2025-07-01 05:49:13.342 if ai == bj:
2025-07-01 05:49:13.348 if eqi is None:
2025-07-01 05:49:13.353 eqi, eqj = i, j
2025-07-01 05:49:13.357 continue
2025-07-01 05:49:13.362 cruncher.set_seq1(ai)
2025-07-01 05:49:13.366 # computing similarity is expensive, so use the quick
2025-07-01 05:49:13.370 # upper bounds first -- have seen this speed up messy
2025-07-01 05:49:13.375 # compares by a factor of 3.
2025-07-01 05:49:13.379 # note that ratio() is only expensive to compute the first
2025-07-01 05:49:13.384 # time it's called on a sequence pair; the expensive part
2025-07-01 05:49:13.389 # of the computation is cached by cruncher
2025-07-01 05:49:13.394 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:49:13.400 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:49:13.404 cruncher.ratio() > best_ratio:
2025-07-01 05:49:13.409 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:49:13.413 if best_ratio < cutoff:
2025-07-01 05:49:13.418 # no non-identical "pretty close" pair
2025-07-01 05:49:13.422 if eqi is None:
2025-07-01 05:49:13.426 # no identical pair either -- treat it as a straight replace
2025-07-01 05:49:13.431 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:49:13.436 return
2025-07-01 05:49:13.442 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:49:13.453 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:49:13.464 else:
2025-07-01 05:49:13.474 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:49:13.483 eqi = None
2025-07-01 05:49:13.495
2025-07-01 05:49:13.506 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:49:13.519 # identical
2025-07-01 05:49:13.530
2025-07-01 05:49:13.539 # pump out diffs from before the synch point
2025-07-01 05:49:13.550 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:49:13.559
2025-07-01 05:49:13.567 # do intraline marking on the synch pair
2025-07-01 05:49:13.574 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:49:13.581 if eqi is None:
2025-07-01 05:49:13.587 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:49:13.592 atags = btags = ""
2025-07-01 05:49:13.597 cruncher.set_seqs(aelt, belt)
2025-07-01 05:49:13.601 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:49:13.606 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:49:13.610 if tag == 'replace':
2025-07-01 05:49:13.616 atags += '^' * la
2025-07-01 05:49:13.621 btags += '^' * lb
2025-07-01 05:49:13.627 elif tag == 'delete':
2025-07-01 05:49:13.632 atags += '-' * la
2025-07-01 05:49:13.637 elif tag == 'insert':
2025-07-01 05:49:13.643 btags += '+' * lb
2025-07-01 05:49:13.649 elif tag == 'equal':
2025-07-01 05:49:13.655 atags += ' ' * la
2025-07-01 05:49:13.660 btags += ' ' * lb
2025-07-01 05:49:13.664 else:
2025-07-01 05:49:13.669 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:49:13.675 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:49:13.681 else:
2025-07-01 05:49:13.688 # the synch pair is identical
2025-07-01 05:49:13.694 yield ' ' + aelt
2025-07-01 05:49:13.705
2025-07-01 05:49:13.716 # pump out diffs from after the synch point
2025-07-01 05:49:13.727 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:49:13.734
2025-07-01 05:49:13.739 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:49:13.746 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:49:13.751
2025-07-01 05:49:13.759 self = <difflib.Differ object at [hex]>
2025-07-01 05:49:13.769 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:49:13.777 alo = 353, ahi = 1101
2025-07-01 05:49:13.784 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:49:13.791 blo = 353, bhi = 1101
2025-07-01 05:49:13.797
2025-07-01 05:49:13.803 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:49:13.809 g = []
2025-07-01 05:49:13.815 if alo < ahi:
2025-07-01 05:49:13.820 if blo < bhi:
2025-07-01 05:49:13.829 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:49:13.839 else:
2025-07-01 05:49:13.851 g = self._dump('-', a, alo, ahi)
2025-07-01 05:49:13.861 elif blo < bhi:
2025-07-01 05:49:13.869 g = self._dump('+', b, blo, bhi)
2025-07-01 05:49:13.876
2025-07-01 05:49:13.882 > yield from g
2025-07-01 05:49:13.894
2025-07-01 05:49:13.905 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:49:13.916 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:49:13.924
2025-07-01 05:49:13.936 self = <difflib.Differ object at [hex]>
2025-07-01 05:49:13.944 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:49:13.951 alo = 353, ahi = 1101
2025-07-01 05:49:13.959 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:49:13.964 blo = 353, bhi = 1101
2025-07-01 05:49:13.970
2025-07-01 05:49:13.977 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:49:13.983 r"""
2025-07-01 05:49:13.989 When replacing one block of lines with another, search the blocks
2025-07-01 05:49:14.000 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:49:14.010 synch point, and intraline difference marking is done on the
2025-07-01 05:49:14.017 similar pair. Lots of work, but often worth it.
2025-07-01 05:49:14.027
2025-07-01 05:49:14.034 Example:
2025-07-01 05:49:14.044
2025-07-01 05:49:14.053 >>> d = Differ()
2025-07-01 05:49:14.063 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:49:14.072 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:49:14.079 >>> print(''.join(results), end="")
2025-07-01 05:49:14.087 - abcDefghiJkl
2025-07-01 05:49:14.102 + abcdefGhijkl
2025-07-01 05:49:14.119 """
2025-07-01 05:49:14.126
2025-07-01 05:49:14.135 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:49:14.147 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:49:14.155 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:49:14.161 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:49:14.167 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:49:14.173
2025-07-01 05:49:14.178 # search for the pair that matches best without being identical
2025-07-01 05:49:14.183 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:49:14.188 # on junk -- unless we have to)
2025-07-01 05:49:14.195 for j in range(blo, bhi):
2025-07-01 05:49:14.201 bj = b[j]
2025-07-01 05:49:14.206 cruncher.set_seq2(bj)
2025-07-01 05:49:14.212 for i in range(alo, ahi):
2025-07-01 05:49:14.218 ai = a[i]
2025-07-01 05:49:14.226 if ai == bj:
2025-07-01 05:49:14.233 if eqi is None:
2025-07-01 05:49:14.241 eqi, eqj = i, j
2025-07-01 05:49:14.248 continue
2025-07-01 05:49:14.256 cruncher.set_seq1(ai)
2025-07-01 05:49:14.265 # computing similarity is expensive, so use the quick
2025-07-01 05:49:14.277 # upper bounds first -- have seen this speed up messy
2025-07-01 05:49:14.287 # compares by a factor of 3.
2025-07-01 05:49:14.298 # note that ratio() is only expensive to compute the first
2025-07-01 05:49:14.307 # time it's called on a sequence pair; the expensive part
2025-07-01 05:49:14.316 # of the computation is cached by cruncher
2025-07-01 05:49:14.324 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:49:14.330 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:49:14.337 cruncher.ratio() > best_ratio:
2025-07-01 05:49:14.344 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:49:14.350 if best_ratio < cutoff:
2025-07-01 05:49:14.362 # no non-identical "pretty close" pair
2025-07-01 05:49:14.374 if eqi is None:
2025-07-01 05:49:14.387 # no identical pair either -- treat it as a straight replace
2025-07-01 05:49:14.395 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:49:14.405 return
2025-07-01 05:49:14.415 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:49:14.424 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:49:14.431 else:
2025-07-01 05:49:14.437 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:49:14.443 eqi = None
2025-07-01 05:49:14.451
2025-07-01 05:49:14.462 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:49:14.470 # identical
2025-07-01 05:49:14.478
2025-07-01 05:49:14.488 # pump out diffs from before the synch point
2025-07-01 05:49:14.499 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:49:14.512
2025-07-01 05:49:14.524 # do intraline marking on the synch pair
2025-07-01 05:49:14.535 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:49:14.544 if eqi is None:
2025-07-01 05:49:14.554 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:49:14.566 atags = btags = ""
2025-07-01 05:49:14.577 cruncher.set_seqs(aelt, belt)
2025-07-01 05:49:14.592 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:49:14.605 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:49:14.618 if tag == 'replace':
2025-07-01 05:49:14.630 atags += '^' * la
2025-07-01 05:49:14.640 btags += '^' * lb
2025-07-01 05:49:14.651 elif tag == 'delete':
2025-07-01 05:49:14.659 atags += '-' * la
2025-07-01 05:49:14.667 elif tag == 'insert':
2025-07-01 05:49:14.674 btags += '+' * lb
2025-07-01 05:49:14.686 elif tag == 'equal':
2025-07-01 05:49:14.698 atags += ' ' * la
2025-07-01 05:49:14.709 btags += ' ' * lb
2025-07-01 05:49:14.721 else:
2025-07-01 05:49:14.733 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:49:14.746 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:49:14.756 else:
2025-07-01 05:49:14.767 # the synch pair is identical
2025-07-01 05:49:14.776 yield ' ' + aelt
2025-07-01 05:49:14.785
2025-07-01 05:49:14.797 # pump out diffs from after the synch point
2025-07-01 05:49:14.808 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:49:14.817
2025-07-01 05:49:14.824 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:49:14.831 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:49:14.839
2025-07-01 05:49:14.850 self = <difflib.Differ object at [hex]>
2025-07-01 05:49:14.858 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:49:14.866 alo = 356, ahi = 1101
2025-07-01 05:49:14.876 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:49:14.883 blo = 356, bhi = 1101
2025-07-01 05:49:14.890
2025-07-01 05:49:14.903 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:49:14.914 g = []
2025-07-01 05:49:14.925 if alo < ahi:
2025-07-01 05:49:14.937 if blo < bhi:
2025-07-01 05:49:14.949 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:49:14.965 else:
2025-07-01 05:49:14.974 g = self._dump('-', a, alo, ahi)
2025-07-01 05:49:14.982 elif blo < bhi:
2025-07-01 05:49:14.992 g = self._dump('+', b, blo, bhi)
2025-07-01 05:49:15.009
2025-07-01 05:49:15.021 > yield from g
2025-07-01 05:49:15.028
2025-07-01 05:49:15.035 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:49:15.042 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:49:15.049
2025-07-01 05:49:15.056 self = <difflib.Differ object at [hex]>
2025-07-01 05:49:15.062 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:49:15.068 alo = 356, ahi = 1101
2025-07-01 05:49:15.075 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:49:15.082 blo = 356, bhi = 1101
2025-07-01 05:49:15.087
2025-07-01 05:49:15.093 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:49:15.099 r"""
2025-07-01 05:49:15.104 When replacing one block of lines with another, search the blocks
2025-07-01 05:49:15.110 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:49:15.116 synch point, and intraline difference marking is done on the
2025-07-01 05:49:15.123 similar pair. Lots of work, but often worth it.
2025-07-01 05:49:15.129
2025-07-01 05:49:15.136 Example:
2025-07-01 05:49:15.143
2025-07-01 05:49:15.149 >>> d = Differ()
2025-07-01 05:49:15.156 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:49:15.164 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:49:15.173 >>> print(''.join(results), end="")
2025-07-01 05:49:15.185 - abcDefghiJkl
2025-07-01 05:49:15.204 + abcdefGhijkl
2025-07-01 05:49:15.217 """
2025-07-01 05:49:15.223
2025-07-01 05:49:15.229 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:49:15.235 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:49:15.241 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:49:15.246 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:49:15.256 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:49:15.266
2025-07-01 05:49:15.273 # search for the pair that matches best without being identical
2025-07-01 05:49:15.282 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:49:15.290 # on junk -- unless we have to)
2025-07-01 05:49:15.297 for j in range(blo, bhi):
2025-07-01 05:49:15.303 bj = b[j]
2025-07-01 05:49:15.309 cruncher.set_seq2(bj)
2025-07-01 05:49:15.316 for i in range(alo, ahi):
2025-07-01 05:49:15.323 ai = a[i]
2025-07-01 05:49:15.329 if ai == bj:
2025-07-01 05:49:15.335 if eqi is None:
2025-07-01 05:49:15.340 eqi, eqj = i, j
2025-07-01 05:49:15.346 continue
2025-07-01 05:49:15.351 cruncher.set_seq1(ai)
2025-07-01 05:49:15.357 # computing similarity is expensive, so use the quick
2025-07-01 05:49:15.363 # upper bounds first -- have seen this speed up messy
2025-07-01 05:49:15.370 # compares by a factor of 3.
2025-07-01 05:49:15.377 # note that ratio() is only expensive to compute the first
2025-07-01 05:49:15.384 # time it's called on a sequence pair; the expensive part
2025-07-01 05:49:15.394 # of the computation is cached by cruncher
2025-07-01 05:49:15.399 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:49:15.404 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:49:15.410 cruncher.ratio() > best_ratio:
2025-07-01 05:49:15.415 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:49:15.421 if best_ratio < cutoff:
2025-07-01 05:49:15.426 # no non-identical "pretty close" pair
2025-07-01 05:49:15.434 if eqi is None:
2025-07-01 05:49:15.441 # no identical pair either -- treat it as a straight replace
2025-07-01 05:49:15.448 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:49:15.454 return
2025-07-01 05:49:15.460 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:49:15.465 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:49:15.471 else:
2025-07-01 05:49:15.477 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:49:15.486 eqi = None
2025-07-01 05:49:15.492
2025-07-01 05:49:15.498 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:49:15.503 # identical
2025-07-01 05:49:15.508
2025-07-01 05:49:15.514 # pump out diffs from before the synch point
2025-07-01 05:49:15.521 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:49:15.529
2025-07-01 05:49:15.536 # do intraline marking on the synch pair
2025-07-01 05:49:15.542 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:49:15.547 if eqi is None:
2025-07-01 05:49:15.553 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:49:15.558 atags = btags = ""
2025-07-01 05:49:15.564 cruncher.set_seqs(aelt, belt)
2025-07-01 05:49:15.570 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:49:15.578 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:49:15.584 if tag == 'replace':
2025-07-01 05:49:15.590 atags += '^' * la
2025-07-01 05:49:15.595 btags += '^' * lb
2025-07-01 05:49:15.600 elif tag == 'delete':
2025-07-01 05:49:15.605 atags += '-' * la
2025-07-01 05:49:15.609 elif tag == 'insert':
2025-07-01 05:49:15.615 btags += '+' * lb
2025-07-01 05:49:15.621 elif tag == 'equal':
2025-07-01 05:49:15.626 atags += ' ' * la
2025-07-01 05:49:15.634 btags += ' ' * lb
2025-07-01 05:49:15.641 else:
2025-07-01 05:49:15.647 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:49:15.653 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:49:15.658 else:
2025-07-01 05:49:15.663 # the synch pair is identical
2025-07-01 05:49:15.668 yield ' ' + aelt
2025-07-01 05:49:15.673
2025-07-01 05:49:15.677 # pump out diffs from after the synch point
2025-07-01 05:49:15.682 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:49:15.686
2025-07-01 05:49:15.692 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:49:15.697 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:49:15.704
2025-07-01 05:49:15.711 self = <difflib.Differ object at [hex]>
2025-07-01 05:49:15.719 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:49:15.727 alo = 357, ahi = 1101
2025-07-01 05:49:15.739 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:49:15.750 blo = 357, bhi = 1101
2025-07-01 05:49:15.758
2025-07-01 05:49:15.765 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:49:15.770 g = []
2025-07-01 05:49:15.777 if alo < ahi:
2025-07-01 05:49:15.783 if blo < bhi:
2025-07-01 05:49:15.790 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:49:15.797 else:
2025-07-01 05:49:15.808 g = self._dump('-', a, alo, ahi)
2025-07-01 05:49:15.817 elif blo < bhi:
2025-07-01 05:49:15.824 g = self._dump('+', b, blo, bhi)
2025-07-01 05:49:15.829
2025-07-01 05:49:15.834 > yield from g
2025-07-01 05:49:15.839
2025-07-01 05:49:15.844 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:49:15.850 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:49:15.855
2025-07-01 05:49:15.861 self = <difflib.Differ object at [hex]>
2025-07-01 05:49:15.870 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:49:15.877 alo = 357, ahi = 1101
2025-07-01 05:49:15.885 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:49:15.893 blo = 357, bhi = 1101
2025-07-01 05:49:15.901
2025-07-01 05:49:15.906 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:49:15.912 r"""
2025-07-01 05:49:15.917 When replacing one block of lines with another, search the blocks
2025-07-01 05:49:15.923 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:49:15.929 synch point, and intraline difference marking is done on the
2025-07-01 05:49:15.935 similar pair. Lots of work, but often worth it.
2025-07-01 05:49:15.941
2025-07-01 05:49:15.948 Example:
2025-07-01 05:49:15.954
2025-07-01 05:49:15.961 >>> d = Differ()
2025-07-01 05:49:15.967 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:49:15.974 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:49:15.985 >>> print(''.join(results), end="")
2025-07-01 05:49:15.996 - abcDefghiJkl
2025-07-01 05:49:16.013 + abcdefGhijkl
2025-07-01 05:49:16.025 """
2025-07-01 05:49:16.035
2025-07-01 05:49:16.042 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:49:16.049 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:49:16.054 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:49:16.061 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:49:16.068 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:49:16.075
2025-07-01 05:49:16.082 # search for the pair that matches best without being identical
2025-07-01 05:49:16.089 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:49:16.096 # on junk -- unless we have to)
2025-07-01 05:49:16.102 for j in range(blo, bhi):
2025-07-01 05:49:16.109 bj = b[j]
2025-07-01 05:49:16.116 cruncher.set_seq2(bj)
2025-07-01 05:49:16.122 for i in range(alo, ahi):
2025-07-01 05:49:16.129 ai = a[i]
2025-07-01 05:49:16.135 if ai == bj:
2025-07-01 05:49:16.142 if eqi is None:
2025-07-01 05:49:16.149 eqi, eqj = i, j
2025-07-01 05:49:16.155 continue
2025-07-01 05:49:16.162 cruncher.set_seq1(ai)
2025-07-01 05:49:16.169 # computing similarity is expensive, so use the quick
2025-07-01 05:49:16.176 # upper bounds first -- have seen this speed up messy
2025-07-01 05:49:16.182 # compares by a factor of 3.
2025-07-01 05:49:16.189 # note that ratio() is only expensive to compute the first
2025-07-01 05:49:16.196 # time it's called on a sequence pair; the expensive part
2025-07-01 05:49:16.203 # of the computation is cached by cruncher
2025-07-01 05:49:16.210 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:49:16.219 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:49:16.229 cruncher.ratio() > best_ratio:
2025-07-01 05:49:16.237 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:49:16.244 if best_ratio < cutoff:
2025-07-01 05:49:16.249 # no non-identical "pretty close" pair
2025-07-01 05:49:16.255 if eqi is None:
2025-07-01 05:49:16.261 # no identical pair either -- treat it as a straight replace
2025-07-01 05:49:16.267 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:49:16.274 return
2025-07-01 05:49:16.282 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:49:16.289 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:49:16.296 else:
2025-07-01 05:49:16.303 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:49:16.308 eqi = None
2025-07-01 05:49:16.314
2025-07-01 05:49:16.322 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:49:16.328 # identical
2025-07-01 05:49:16.334
2025-07-01 05:49:16.340 # pump out diffs from before the synch point
2025-07-01 05:49:16.345 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:49:16.351
2025-07-01 05:49:16.357 # do intraline marking on the synch pair
2025-07-01 05:49:16.363 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:49:16.369 if eqi is None:
2025-07-01 05:49:16.375 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:49:16.381 atags = btags = ""
2025-07-01 05:49:16.388 cruncher.set_seqs(aelt, belt)
2025-07-01 05:49:16.395 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:49:16.400 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:49:16.405 if tag == 'replace':
2025-07-01 05:49:16.409 atags += '^' * la
2025-07-01 05:49:16.414 btags += '^' * lb
2025-07-01 05:49:16.419 elif tag == 'delete':
2025-07-01 05:49:16.423 atags += '-' * la
2025-07-01 05:49:16.428 elif tag == 'insert':
2025-07-01 05:49:16.433 btags += '+' * lb
2025-07-01 05:49:16.438 elif tag == 'equal':
2025-07-01 05:49:16.443 atags += ' ' * la
2025-07-01 05:49:16.449 btags += ' ' * lb
2025-07-01 05:49:16.453 else:
2025-07-01 05:49:16.458 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:49:16.463 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:49:16.468 else:
2025-07-01 05:49:16.474 # the synch pair is identical
2025-07-01 05:49:16.484 yield ' ' + aelt
2025-07-01 05:49:16.492
2025-07-01 05:49:16.499 # pump out diffs from after the synch point
2025-07-01 05:49:16.505 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:49:16.511
2025-07-01 05:49:16.517 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:49:16.524 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:49:16.530
2025-07-01 05:49:16.541 self = <difflib.Differ object at [hex]>
2025-07-01 05:49:16.550 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:49:16.560 alo = 358, ahi = 1101
2025-07-01 05:49:16.571 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:49:16.580 blo = 358, bhi = 1101
2025-07-01 05:49:16.586
2025-07-01 05:49:16.592 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:49:16.598 g = []
2025-07-01 05:49:16.604 if alo < ahi:
2025-07-01 05:49:16.610 if blo < bhi:
2025-07-01 05:49:16.616 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:49:16.622 else:
2025-07-01 05:49:16.628 g = self._dump('-', a, alo, ahi)
2025-07-01 05:49:16.634 elif blo < bhi:
2025-07-01 05:49:16.640 g = self._dump('+', b, blo, bhi)
2025-07-01 05:49:16.647
2025-07-01 05:49:16.653 > yield from g
2025-07-01 05:49:16.658
2025-07-01 05:49:16.663 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:49:16.668 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:49:16.673
2025-07-01 05:49:16.677 self = <difflib.Differ object at [hex]>
2025-07-01 05:49:16.683 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:49:16.688 alo = 358, ahi = 1101
2025-07-01 05:49:16.694 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:49:16.699 blo = 358, bhi = 1101
2025-07-01 05:49:16.705
2025-07-01 05:49:16.711 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:49:16.719 r"""
2025-07-01 05:49:16.727 When replacing one block of lines with another, search the blocks
2025-07-01 05:49:16.734 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:49:16.740 synch point, and intraline difference marking is done on the
2025-07-01 05:49:16.746 similar pair. Lots of work, but often worth it.
2025-07-01 05:49:16.751
2025-07-01 05:49:16.758 Example:
2025-07-01 05:49:16.768
2025-07-01 05:49:16.777 >>> d = Differ()
2025-07-01 05:49:16.784 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:49:16.790 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:49:16.801 >>> print(''.join(results), end="")
2025-07-01 05:49:16.807 - abcDefghiJkl
2025-07-01 05:49:16.820 + abcdefGhijkl
2025-07-01 05:49:16.840 """
2025-07-01 05:49:16.847
2025-07-01 05:49:16.857 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:49:16.869 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:49:16.880 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:49:16.890 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:49:16.900 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:49:16.912
2025-07-01 05:49:16.921 # search for the pair that matches best without being identical
2025-07-01 05:49:16.929 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:49:16.936 # on junk -- unless we have to)
2025-07-01 05:49:16.942 for j in range(blo, bhi):
2025-07-01 05:49:16.948 bj = b[j]
2025-07-01 05:49:16.954 cruncher.set_seq2(bj)
2025-07-01 05:49:16.960 for i in range(alo, ahi):
2025-07-01 05:49:16.965 ai = a[i]
2025-07-01 05:49:16.971 if ai == bj:
2025-07-01 05:49:16.977 if eqi is None:
2025-07-01 05:49:16.986 eqi, eqj = i, j
2025-07-01 05:49:16.996 continue
2025-07-01 05:49:17.005 cruncher.set_seq1(ai)
2025-07-01 05:49:17.011 # computing similarity is expensive, so use the quick
2025-07-01 05:49:17.017 # upper bounds first -- have seen this speed up messy
2025-07-01 05:49:17.022 # compares by a factor of 3.
2025-07-01 05:49:17.027 # note that ratio() is only expensive to compute the first
2025-07-01 05:49:17.033 # time it's called on a sequence pair; the expensive part
2025-07-01 05:49:17.038 # of the computation is cached by cruncher
2025-07-01 05:49:17.044 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:49:17.050 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:49:17.055 cruncher.ratio() > best_ratio:
2025-07-01 05:49:17.060 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:49:17.066 if best_ratio < cutoff:
2025-07-01 05:49:17.077 # no non-identical "pretty close" pair
2025-07-01 05:49:17.085 if eqi is None:
2025-07-01 05:49:17.090 # no identical pair either -- treat it as a straight replace
2025-07-01 05:49:17.102 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:49:17.112 return
2025-07-01 05:49:17.119 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:49:17.127 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:49:17.138 else:
2025-07-01 05:49:17.146 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:49:17.151 eqi = None
2025-07-01 05:49:17.156
2025-07-01 05:49:17.161 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:49:17.165 # identical
2025-07-01 05:49:17.170
2025-07-01 05:49:17.174 # pump out diffs from before the synch point
2025-07-01 05:49:17.178 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:49:17.183
2025-07-01 05:49:17.189 # do intraline marking on the synch pair
2025-07-01 05:49:17.194 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:49:17.200 if eqi is None:
2025-07-01 05:49:17.206 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:49:17.211 atags = btags = ""
2025-07-01 05:49:17.216 cruncher.set_seqs(aelt, belt)
2025-07-01 05:49:17.221 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:49:17.227 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:49:17.233 if tag == 'replace':
2025-07-01 05:49:17.239 atags += '^' * la
2025-07-01 05:49:17.245 btags += '^' * lb
2025-07-01 05:49:17.251 elif tag == 'delete':
2025-07-01 05:49:17.257 atags += '-' * la
2025-07-01 05:49:17.267 elif tag == 'insert':
2025-07-01 05:49:17.280 btags += '+' * lb
2025-07-01 05:49:17.291 elif tag == 'equal':
2025-07-01 05:49:17.299 atags += ' ' * la
2025-07-01 05:49:17.307 btags += ' ' * lb
2025-07-01 05:49:17.315 else:
2025-07-01 05:49:17.325 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:49:17.333 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:49:17.340 else:
2025-07-01 05:49:17.347 # the synch pair is identical
2025-07-01 05:49:17.355 yield ' ' + aelt
2025-07-01 05:49:17.368
2025-07-01 05:49:17.379 # pump out diffs from after the synch point
2025-07-01 05:49:17.388 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:49:17.395
2025-07-01 05:49:17.401 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:49:17.408 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:49:17.414
2025-07-01 05:49:17.421 self = <difflib.Differ object at [hex]>
2025-07-01 05:49:17.429 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:49:17.435 alo = 359, ahi = 1101
2025-07-01 05:49:17.441 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:49:17.447 blo = 359, bhi = 1101
2025-07-01 05:49:17.453
2025-07-01 05:49:17.458 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:49:17.464 g = []
2025-07-01 05:49:17.470 if alo < ahi:
2025-07-01 05:49:17.481 if blo < bhi:
2025-07-01 05:49:17.490 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:49:17.496 else:
2025-07-01 05:49:17.502 g = self._dump('-', a, alo, ahi)
2025-07-01 05:49:17.508 elif blo < bhi:
2025-07-01 05:49:17.513 g = self._dump('+', b, blo, bhi)
2025-07-01 05:49:17.519
2025-07-01 05:49:17.525 > yield from g
2025-07-01 05:49:17.531
2025-07-01 05:49:17.543 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:49:17.553 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:49:17.560
2025-07-01 05:49:17.565 self = <difflib.Differ object at [hex]>
2025-07-01 05:49:17.570 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:49:17.575 alo = 359, ahi = 1101
2025-07-01 05:49:17.581 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:49:17.585 blo = 359, bhi = 1101
2025-07-01 05:49:17.590
2025-07-01 05:49:17.594 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:49:17.599 r"""
2025-07-01 05:49:17.604 When replacing one block of lines with another, search the blocks
2025-07-01 05:49:17.609 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:49:17.614 synch point, and intraline difference marking is done on the
2025-07-01 05:49:17.619 similar pair. Lots of work, but often worth it.
2025-07-01 05:49:17.625
2025-07-01 05:49:17.631 Example:
2025-07-01 05:49:17.637
2025-07-01 05:49:17.644 >>> d = Differ()
2025-07-01 05:49:17.651 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:49:17.657 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:49:17.662 >>> print(''.join(results), end="")
2025-07-01 05:49:17.668 - abcDefghiJkl
2025-07-01 05:49:17.688 + abcdefGhijkl
2025-07-01 05:49:17.708 """
2025-07-01 05:49:17.714
2025-07-01 05:49:17.722 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:49:17.734 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:49:17.744 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:49:17.753 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:49:17.766 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:49:17.774
2025-07-01 05:49:17.781 # search for the pair that matches best without being identical
2025-07-01 05:49:17.787 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:49:17.793 # on junk -- unless we have to)
2025-07-01 05:49:17.798 for j in range(blo, bhi):
2025-07-01 05:49:17.806 bj = b[j]
2025-07-01 05:49:17.812 cruncher.set_seq2(bj)
2025-07-01 05:49:17.818 for i in range(alo, ahi):
2025-07-01 05:49:17.828 ai = a[i]
2025-07-01 05:49:17.837 if ai == bj:
2025-07-01 05:49:17.848 if eqi is None:
2025-07-01 05:49:17.860 eqi, eqj = i, j
2025-07-01 05:49:17.869 continue
2025-07-01 05:49:17.879 cruncher.set_seq1(ai)
2025-07-01 05:49:17.890 # computing similarity is expensive, so use the quick
2025-07-01 05:49:17.899 # upper bounds first -- have seen this speed up messy
2025-07-01 05:49:17.907 # compares by a factor of 3.
2025-07-01 05:49:17.915 # note that ratio() is only expensive to compute the first
2025-07-01 05:49:17.926 # time it's called on a sequence pair; the expensive part
2025-07-01 05:49:17.935 # of the computation is cached by cruncher
2025-07-01 05:49:17.944 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:49:17.955 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:49:17.963 cruncher.ratio() > best_ratio:
2025-07-01 05:49:17.971 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:49:17.977 if best_ratio < cutoff:
2025-07-01 05:49:17.982 # no non-identical "pretty close" pair
2025-07-01 05:49:17.988 if eqi is None:
2025-07-01 05:49:17.993 # no identical pair either -- treat it as a straight replace
2025-07-01 05:49:17.998 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:49:18.003 return
2025-07-01 05:49:18.008 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:49:18.013 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:49:18.017 else:
2025-07-01 05:49:18.025 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:49:18.031 eqi = None
2025-07-01 05:49:18.036
2025-07-01 05:49:18.042 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:49:18.048 # identical
2025-07-01 05:49:18.055
2025-07-01 05:49:18.061 # pump out diffs from before the synch point
2025-07-01 05:49:18.071 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:49:18.081
2025-07-01 05:49:18.090 # do intraline marking on the synch pair
2025-07-01 05:49:18.097 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:49:18.103 if eqi is None:
2025-07-01 05:49:18.108 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:49:18.113 atags = btags = ""
2025-07-01 05:49:18.119 cruncher.set_seqs(aelt, belt)
2025-07-01 05:49:18.124 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:49:18.129 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:49:18.136 if tag == 'replace':
2025-07-01 05:49:18.142 atags += '^' * la
2025-07-01 05:49:18.149 btags += '^' * lb
2025-07-01 05:49:18.155 elif tag == 'delete':
2025-07-01 05:49:18.160 atags += '-' * la
2025-07-01 05:49:18.166 elif tag == 'insert':
2025-07-01 05:49:18.177 btags += '+' * lb
2025-07-01 05:49:18.186 elif tag == 'equal':
2025-07-01 05:49:18.194 atags += ' ' * la
2025-07-01 05:49:18.201 btags += ' ' * lb
2025-07-01 05:49:18.207 else:
2025-07-01 05:49:18.213 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:49:18.220 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:49:18.227 else:
2025-07-01 05:49:18.234 # the synch pair is identical
2025-07-01 05:49:18.245 yield ' ' + aelt
2025-07-01 05:49:18.256
2025-07-01 05:49:18.264 # pump out diffs from after the synch point
2025-07-01 05:49:18.275 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:49:18.282
2025-07-01 05:49:18.289 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:49:18.297 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:49:18.310
2025-07-01 05:49:18.319 self = <difflib.Differ object at [hex]>
2025-07-01 05:49:18.329 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:49:18.337 alo = 360, ahi = 1101
2025-07-01 05:49:18.346 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:49:18.354 blo = 360, bhi = 1101
2025-07-01 05:49:18.363
2025-07-01 05:49:18.371 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:49:18.376 g = []
2025-07-01 05:49:18.382 if alo < ahi:
2025-07-01 05:49:18.387 if blo < bhi:
2025-07-01 05:49:18.394 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:49:18.400 else:
2025-07-01 05:49:18.408 g = self._dump('-', a, alo, ahi)
2025-07-01 05:49:18.415 elif blo < bhi:
2025-07-01 05:49:18.422 g = self._dump('+', b, blo, bhi)
2025-07-01 05:49:18.432
2025-07-01 05:49:18.441 > yield from g
2025-07-01 05:49:18.450
2025-07-01 05:49:18.457 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:49:18.463 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:49:18.469
2025-07-01 05:49:18.474 self = <difflib.Differ object at [hex]>
2025-07-01 05:49:18.481 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:49:18.487 alo = 360, ahi = 1101
2025-07-01 05:49:18.495 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:49:18.502 blo = 360, bhi = 1101
2025-07-01 05:49:18.508
2025-07-01 05:49:18.514 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:49:18.522 r"""
2025-07-01 05:49:18.530 When replacing one block of lines with another, search the blocks
2025-07-01 05:49:18.539 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:49:18.546 synch point, and intraline difference marking is done on the
2025-07-01 05:49:18.552 similar pair. Lots of work, but often worth it.
2025-07-01 05:49:18.558
2025-07-01 05:49:18.563 Example:
2025-07-01 05:49:18.574
2025-07-01 05:49:18.582 >>> d = Differ()
2025-07-01 05:49:18.591 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:49:18.600 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:49:18.606 >>> print(''.join(results), end="")
2025-07-01 05:49:18.612 - abcDefghiJkl
2025-07-01 05:49:18.622 + abcdefGhijkl
2025-07-01 05:49:18.631 """
2025-07-01 05:49:18.637
2025-07-01 05:49:18.645 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:49:18.655 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:49:18.667 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:49:18.677 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:49:18.686 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:49:18.696
2025-07-01 05:49:18.708 # search for the pair that matches best without being identical
2025-07-01 05:49:18.719 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:49:18.729 # on junk -- unless we have to)
2025-07-01 05:49:18.739 for j in range(blo, bhi):
2025-07-01 05:49:18.750 bj = b[j]
2025-07-01 05:49:18.759 cruncher.set_seq2(bj)
2025-07-01 05:49:18.767 for i in range(alo, ahi):
2025-07-01 05:49:18.776 ai = a[i]
2025-07-01 05:49:18.788 if ai == bj:
2025-07-01 05:49:18.799 if eqi is None:
2025-07-01 05:49:18.810 eqi, eqj = i, j
2025-07-01 05:49:18.820 continue
2025-07-01 05:49:18.827 cruncher.set_seq1(ai)
2025-07-01 05:49:18.833 # computing similarity is expensive, so use the quick
2025-07-01 05:49:18.841 # upper bounds first -- have seen this speed up messy
2025-07-01 05:49:18.848 # compares by a factor of 3.
2025-07-01 05:49:18.854 # note that ratio() is only expensive to compute the first
2025-07-01 05:49:18.860 # time it's called on a sequence pair; the expensive part
2025-07-01 05:49:18.869 # of the computation is cached by cruncher
2025-07-01 05:49:18.881 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:49:18.890 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:49:18.900 cruncher.ratio() > best_ratio:
2025-07-01 05:49:18.911 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:49:18.921 if best_ratio < cutoff:
2025-07-01 05:49:18.932 # no non-identical "pretty close" pair
2025-07-01 05:49:18.943 if eqi is None:
2025-07-01 05:49:18.952 # no identical pair either -- treat it as a straight replace
2025-07-01 05:49:18.960 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:49:18.966 return
2025-07-01 05:49:18.973 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:49:18.979 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:49:18.985 else:
2025-07-01 05:49:18.991 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:49:18.997 eqi = None
2025-07-01 05:49:19.002
2025-07-01 05:49:19.013 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:49:19.022 # identical
2025-07-01 05:49:19.029
2025-07-01 05:49:19.037 # pump out diffs from before the synch point
2025-07-01 05:49:19.049 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:49:19.059
2025-07-01 05:49:19.068 # do intraline marking on the synch pair
2025-07-01 05:49:19.077 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:49:19.086 if eqi is None:
2025-07-01 05:49:19.095 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:49:19.102 atags = btags = ""
2025-07-01 05:49:19.111 cruncher.set_seqs(aelt, belt)
2025-07-01 05:49:19.116 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:49:19.123 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:49:19.131 if tag == 'replace':
2025-07-01 05:49:19.143 atags += '^' * la
2025-07-01 05:49:19.153 btags += '^' * lb
2025-07-01 05:49:19.161 elif tag == 'delete':
2025-07-01 05:49:19.168 atags += '-' * la
2025-07-01 05:49:19.174 elif tag == 'insert':
2025-07-01 05:49:19.184 btags += '+' * lb
2025-07-01 05:49:19.195 elif tag == 'equal':
2025-07-01 05:49:19.207 atags += ' ' * la
2025-07-01 05:49:19.218 btags += ' ' * lb
2025-07-01 05:49:19.231 else:
2025-07-01 05:49:19.240 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:49:19.253 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:49:19.263 else:
2025-07-01 05:49:19.270 # the synch pair is identical
2025-07-01 05:49:19.277 yield ' ' + aelt
2025-07-01 05:49:19.283
2025-07-01 05:49:19.290 # pump out diffs from after the synch point
2025-07-01 05:49:19.299 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:49:19.311
2025-07-01 05:49:19.319 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:49:19.326 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:49:19.331
2025-07-01 05:49:19.336 self = <difflib.Differ object at [hex]>
2025-07-01 05:49:19.341 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:49:19.345 alo = 361, ahi = 1101
2025-07-01 05:49:19.350 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:49:19.355 blo = 361, bhi = 1101
2025-07-01 05:49:19.359
2025-07-01 05:49:19.364 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:49:19.368 g = []
2025-07-01 05:49:19.373 if alo < ahi:
2025-07-01 05:49:19.377 if blo < bhi:
2025-07-01 05:49:19.381 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:49:19.386 else:
2025-07-01 05:49:19.390 g = self._dump('-', a, alo, ahi)
2025-07-01 05:49:19.394 elif blo < bhi:
2025-07-01 05:49:19.399 g = self._dump('+', b, blo, bhi)
2025-07-01 05:49:19.403
2025-07-01 05:49:19.414 > yield from g
2025-07-01 05:49:19.422
2025-07-01 05:49:19.429 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:49:19.435 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:49:19.443
2025-07-01 05:49:19.454 self = <difflib.Differ object at [hex]>
2025-07-01 05:49:19.462 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:49:19.468 alo = 361, ahi = 1101
2025-07-01 05:49:19.474 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:49:19.479 blo = 361, bhi = 1101
2025-07-01 05:49:19.484
2025-07-01 05:49:19.489 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:49:19.493 r"""
2025-07-01 05:49:19.498 When replacing one block of lines with another, search the blocks
2025-07-01 05:49:19.503 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:49:19.508 synch point, and intraline difference marking is done on the
2025-07-01 05:49:19.514 similar pair. Lots of work, but often worth it.
2025-07-01 05:49:19.518
2025-07-01 05:49:19.523 Example:
2025-07-01 05:49:19.530
2025-07-01 05:49:19.536 >>> d = Differ()
2025-07-01 05:49:19.542 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:49:19.549 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:49:19.556 >>> print(''.join(results), end="")
2025-07-01 05:49:19.563 - abcDefghiJkl
2025-07-01 05:49:19.577 + abcdefGhijkl
2025-07-01 05:49:19.590 """
2025-07-01 05:49:19.599
2025-07-01 05:49:19.607 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:49:19.614 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:49:19.619 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:49:19.624 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:49:19.628 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:49:19.633
2025-07-01 05:49:19.639 # search for the pair that matches best without being identical
2025-07-01 05:49:19.644 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:49:19.650 # on junk -- unless we have to)
2025-07-01 05:49:19.657 for j in range(blo, bhi):
2025-07-01 05:49:19.664 bj = b[j]
2025-07-01 05:49:19.670 cruncher.set_seq2(bj)
2025-07-01 05:49:19.676 for i in range(alo, ahi):
2025-07-01 05:49:19.680 ai = a[i]
2025-07-01 05:49:19.685 if ai == bj:
2025-07-01 05:49:19.690 if eqi is None:
2025-07-01 05:49:19.694 eqi, eqj = i, j
2025-07-01 05:49:19.699 continue
2025-07-01 05:49:19.704 cruncher.set_seq1(ai)
2025-07-01 05:49:19.709 # computing similarity is expensive, so use the quick
2025-07-01 05:49:19.714 # upper bounds first -- have seen this speed up messy
2025-07-01 05:49:19.720 # compares by a factor of 3.
2025-07-01 05:49:19.725 # note that ratio() is only expensive to compute the first
2025-07-01 05:49:19.731 # time it's called on a sequence pair; the expensive part
2025-07-01 05:49:19.737 # of the computation is cached by cruncher
2025-07-01 05:49:19.745 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:49:19.751 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:49:19.758 cruncher.ratio() > best_ratio:
2025-07-01 05:49:19.768 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:49:19.778 if best_ratio < cutoff:
2025-07-01 05:49:19.786 # no non-identical "pretty close" pair
2025-07-01 05:49:19.792 if eqi is None:
2025-07-01 05:49:19.799 # no identical pair either -- treat it as a straight replace
2025-07-01 05:49:19.807 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:49:19.815 return
2025-07-01 05:49:19.827 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:49:19.836 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:49:19.843 else:
2025-07-01 05:49:19.849 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:49:19.854 eqi = None
2025-07-01 05:49:19.859
2025-07-01 05:49:19.864 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:49:19.869 # identical
2025-07-01 05:49:19.874
2025-07-01 05:49:19.878 # pump out diffs from before the synch point
2025-07-01 05:49:19.883 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:49:19.888
2025-07-01 05:49:19.894 # do intraline marking on the synch pair
2025-07-01 05:49:19.899 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:49:19.905 if eqi is None:
2025-07-01 05:49:19.916 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:49:19.925 atags = btags = ""
2025-07-01 05:49:19.933 cruncher.set_seqs(aelt, belt)
2025-07-01 05:49:19.939 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:49:19.944 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:49:19.950 if tag == 'replace':
2025-07-01 05:49:19.956 atags += '^' * la
2025-07-01 05:49:19.962 btags += '^' * lb
2025-07-01 05:49:19.971 elif tag == 'delete':
2025-07-01 05:49:19.980 atags += '-' * la
2025-07-01 05:49:19.988 elif tag == 'insert':
2025-07-01 05:49:19.993 btags += '+' * lb
2025-07-01 05:49:19.999 elif tag == 'equal':
2025-07-01 05:49:20.005 atags += ' ' * la
2025-07-01 05:49:20.011 btags += ' ' * lb
2025-07-01 05:49:20.017 else:
2025-07-01 05:49:20.023 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:49:20.030 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:49:20.036 else:
2025-07-01 05:49:20.043 # the synch pair is identical
2025-07-01 05:49:20.049 yield ' ' + aelt
2025-07-01 05:49:20.055
2025-07-01 05:49:20.062 # pump out diffs from after the synch point
2025-07-01 05:49:20.072 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:49:20.083
2025-07-01 05:49:20.091 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:49:20.099 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:49:20.106
2025-07-01 05:49:20.113 self = <difflib.Differ object at [hex]>
2025-07-01 05:49:20.121 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:49:20.127 alo = 362, ahi = 1101
2025-07-01 05:49:20.133 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:49:20.137 blo = 362, bhi = 1101
2025-07-01 05:49:20.143
2025-07-01 05:49:20.149 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:49:20.153 g = []
2025-07-01 05:49:20.158 if alo < ahi:
2025-07-01 05:49:20.162 if blo < bhi:
2025-07-01 05:49:20.168 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:49:20.173 else:
2025-07-01 05:49:20.179 g = self._dump('-', a, alo, ahi)
2025-07-01 05:49:20.185 elif blo < bhi:
2025-07-01 05:49:20.192 g = self._dump('+', b, blo, bhi)
2025-07-01 05:49:20.199
2025-07-01 05:49:20.205 > yield from g
2025-07-01 05:49:20.212
2025-07-01 05:49:20.219 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:49:20.226 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:49:20.232
2025-07-01 05:49:20.239 self = <difflib.Differ object at [hex]>
2025-07-01 05:49:20.246 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:49:20.259 alo = 362, ahi = 1101
2025-07-01 05:49:20.269 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:49:20.275 blo = 362, bhi = 1101
2025-07-01 05:49:20.280
2025-07-01 05:49:20.286 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:49:20.292 r"""
2025-07-01 05:49:20.298 When replacing one block of lines with another, search the blocks
2025-07-01 05:49:20.307 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:49:20.314 synch point, and intraline difference marking is done on the
2025-07-01 05:49:20.321 similar pair. Lots of work, but often worth it.
2025-07-01 05:49:20.327
2025-07-01 05:49:20.332 Example:
2025-07-01 05:49:20.337
2025-07-01 05:49:20.341 >>> d = Differ()
2025-07-01 05:49:20.346 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:49:20.351 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:49:20.355 >>> print(''.join(results), end="")
2025-07-01 05:49:20.361 - abcDefghiJkl
2025-07-01 05:49:20.370 + abcdefGhijkl
2025-07-01 05:49:20.384 """
2025-07-01 05:49:20.391
2025-07-01 05:49:20.398 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:49:20.405 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:49:20.412 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:49:20.419 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:49:20.435 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:49:20.445
2025-07-01 05:49:20.456 # search for the pair that matches best without being identical
2025-07-01 05:49:20.464 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:49:20.473 # on junk -- unless we have to)
2025-07-01 05:49:20.485 for j in range(blo, bhi):
2025-07-01 05:49:20.493 bj = b[j]
2025-07-01 05:49:20.500 cruncher.set_seq2(bj)
2025-07-01 05:49:20.507 for i in range(alo, ahi):
2025-07-01 05:49:20.515 ai = a[i]
2025-07-01 05:49:20.526 if ai == bj:
2025-07-01 05:49:20.534 if eqi is None:
2025-07-01 05:49:20.541 eqi, eqj = i, j
2025-07-01 05:49:20.547 continue
2025-07-01 05:49:20.555 cruncher.set_seq1(ai)
2025-07-01 05:49:20.568 # computing similarity is expensive, so use the quick
2025-07-01 05:49:20.577 # upper bounds first -- have seen this speed up messy
2025-07-01 05:49:20.585 # compares by a factor of 3.
2025-07-01 05:49:20.591 # note that ratio() is only expensive to compute the first
2025-07-01 05:49:20.597 # time it's called on a sequence pair; the expensive part
2025-07-01 05:49:20.603 # of the computation is cached by cruncher
2025-07-01 05:49:20.611 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:49:20.621 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:49:20.634 cruncher.ratio() > best_ratio:
2025-07-01 05:49:20.645 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:49:20.653 if best_ratio < cutoff:
2025-07-01 05:49:20.660 # no non-identical "pretty close" pair
2025-07-01 05:49:20.667 if eqi is None:
2025-07-01 05:49:20.675 # no identical pair either -- treat it as a straight replace
2025-07-01 05:49:20.685 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:49:20.693 return
2025-07-01 05:49:20.700 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:49:20.707 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:49:20.719 else:
2025-07-01 05:49:20.729 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:49:20.740 eqi = None
2025-07-01 05:49:20.751
2025-07-01 05:49:20.760 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:49:20.768 # identical
2025-07-01 05:49:20.775
2025-07-01 05:49:20.787 # pump out diffs from before the synch point
2025-07-01 05:49:20.801 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:49:20.815
2025-07-01 05:49:20.823 # do intraline marking on the synch pair
2025-07-01 05:49:20.830 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:49:20.838 if eqi is None:
2025-07-01 05:49:20.845 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:49:20.851 atags = btags = ""
2025-07-01 05:49:20.855 cruncher.set_seqs(aelt, belt)
2025-07-01 05:49:20.860 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:49:20.865 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:49:20.870 if tag == 'replace':
2025-07-01 05:49:20.881 atags += '^' * la
2025-07-01 05:49:20.887 btags += '^' * lb
2025-07-01 05:49:20.893 elif tag == 'delete':
2025-07-01 05:49:20.899 atags += '-' * la
2025-07-01 05:49:20.904 elif tag == 'insert':
2025-07-01 05:49:20.911 btags += '+' * lb
2025-07-01 05:49:20.918 elif tag == 'equal':
2025-07-01 05:49:20.929 atags += ' ' * la
2025-07-01 05:49:20.940 btags += ' ' * lb
2025-07-01 05:49:20.950 else:
2025-07-01 05:49:20.956 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:49:20.962 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:49:20.969 else:
2025-07-01 05:49:20.976 # the synch pair is identical
2025-07-01 05:49:20.982 yield ' ' + aelt
2025-07-01 05:49:20.992
2025-07-01 05:49:21.003 # pump out diffs from after the synch point
2025-07-01 05:49:21.013 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:49:21.020
2025-07-01 05:49:21.027 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:49:21.033 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:49:21.038
2025-07-01 05:49:21.043 self = <difflib.Differ object at [hex]>
2025-07-01 05:49:21.048 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:49:21.053 alo = 363, ahi = 1101
2025-07-01 05:49:21.058 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:49:21.063 blo = 363, bhi = 1101
2025-07-01 05:49:21.069
2025-07-01 05:49:21.075 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:49:21.082 g = []
2025-07-01 05:49:21.087 if alo < ahi:
2025-07-01 05:49:21.094 if blo < bhi:
2025-07-01 05:49:21.104 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:49:21.112 else:
2025-07-01 05:49:21.119 g = self._dump('-', a, alo, ahi)
2025-07-01 05:49:21.125 elif blo < bhi:
2025-07-01 05:49:21.130 g = self._dump('+', b, blo, bhi)
2025-07-01 05:49:21.136
2025-07-01 05:49:21.142 > yield from g
2025-07-01 05:49:21.151
2025-07-01 05:49:21.159 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:49:21.166 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:49:21.173
2025-07-01 05:49:21.178 self = <difflib.Differ object at [hex]>
2025-07-01 05:49:21.185 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:49:21.191 alo = 363, ahi = 1101
2025-07-01 05:49:21.197 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:49:21.203 blo = 363, bhi = 1101
2025-07-01 05:49:21.213
2025-07-01 05:49:21.221 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:49:21.228 r"""
2025-07-01 05:49:21.235 When replacing one block of lines with another, search the blocks
2025-07-01 05:49:21.240 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:49:21.246 synch point, and intraline difference marking is done on the
2025-07-01 05:49:21.251 similar pair. Lots of work, but often worth it.
2025-07-01 05:49:21.258
2025-07-01 05:49:21.267 Example:
2025-07-01 05:49:21.275
2025-07-01 05:49:21.286 >>> d = Differ()
2025-07-01 05:49:21.295 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:49:21.301 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:49:21.307 >>> print(''.join(results), end="")
2025-07-01 05:49:21.320 - abcDefghiJkl
2025-07-01 05:49:21.344 + abcdefGhijkl
2025-07-01 05:49:21.366 """
2025-07-01 05:49:21.375
2025-07-01 05:49:21.386 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:49:21.398 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:49:21.410 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:49:21.421 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:49:21.428 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:49:21.439
2025-07-01 05:49:21.449 # search for the pair that matches best without being identical
2025-07-01 05:49:21.458 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:49:21.465 # on junk -- unless we have to)
2025-07-01 05:49:21.477 for j in range(blo, bhi):
2025-07-01 05:49:21.484 bj = b[j]
2025-07-01 05:49:21.491 cruncher.set_seq2(bj)
2025-07-01 05:49:21.498 for i in range(alo, ahi):
2025-07-01 05:49:21.504 ai = a[i]
2025-07-01 05:49:21.511 if ai == bj:
2025-07-01 05:49:21.521 if eqi is None:
2025-07-01 05:49:21.528 eqi, eqj = i, j
2025-07-01 05:49:21.535 continue
2025-07-01 05:49:21.544 cruncher.set_seq1(ai)
2025-07-01 05:49:21.553 # computing similarity is expensive, so use the quick
2025-07-01 05:49:21.564 # upper bounds first -- have seen this speed up messy
2025-07-01 05:49:21.574 # compares by a factor of 3.
2025-07-01 05:49:21.582 # note that ratio() is only expensive to compute the first
2025-07-01 05:49:21.590 # time it's called on a sequence pair; the expensive part
2025-07-01 05:49:21.597 # of the computation is cached by cruncher
2025-07-01 05:49:21.608 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:49:21.620 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:49:21.629 cruncher.ratio() > best_ratio:
2025-07-01 05:49:21.642 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:49:21.650 if best_ratio < cutoff:
2025-07-01 05:49:21.660 # no non-identical "pretty close" pair
2025-07-01 05:49:21.667 if eqi is None:
2025-07-01 05:49:21.675 # no identical pair either -- treat it as a straight replace
2025-07-01 05:49:21.685 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:49:21.697 return
2025-07-01 05:49:21.708 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:49:21.717 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:49:21.724 else:
2025-07-01 05:49:21.730 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:49:21.735 eqi = None
2025-07-01 05:49:21.740
2025-07-01 05:49:21.746 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:49:21.756 # identical
2025-07-01 05:49:21.766
2025-07-01 05:49:21.774 # pump out diffs from before the synch point
2025-07-01 05:49:21.782 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:49:21.789
2025-07-01 05:49:21.796 # do intraline marking on the synch pair
2025-07-01 05:49:21.803 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:49:21.809 if eqi is None:
2025-07-01 05:49:21.814 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:49:21.831 atags = btags = ""
2025-07-01 05:49:21.843 cruncher.set_seqs(aelt, belt)
2025-07-01 05:49:21.853 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:49:21.865 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:49:21.875 if tag == 'replace':
2025-07-01 05:49:21.884 atags += '^' * la
2025-07-01 05:49:21.892 btags += '^' * lb
2025-07-01 05:49:21.898 elif tag == 'delete':
2025-07-01 05:49:21.909 atags += '-' * la
2025-07-01 05:49:21.920 elif tag == 'insert':
2025-07-01 05:49:21.929 btags += '+' * lb
2025-07-01 05:49:21.936 elif tag == 'equal':
2025-07-01 05:49:21.943 atags += ' ' * la
2025-07-01 05:49:21.951 btags += ' ' * lb
2025-07-01 05:49:21.961 else:
2025-07-01 05:49:21.969 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:49:21.976 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:49:21.982 else:
2025-07-01 05:49:21.993 # the synch pair is identical
2025-07-01 05:49:22.004 yield ' ' + aelt
2025-07-01 05:49:22.013
2025-07-01 05:49:22.021 # pump out diffs from after the synch point
2025-07-01 05:49:22.028 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:49:22.035
2025-07-01 05:49:22.042 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:49:22.054 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:49:22.060
2025-07-01 05:49:22.067 self = <difflib.Differ object at [hex]>
2025-07-01 05:49:22.074 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:49:22.086 alo = 364, ahi = 1101
2025-07-01 05:49:22.095 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:49:22.104 blo = 364, bhi = 1101
2025-07-01 05:49:22.110
2025-07-01 05:49:22.116 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:49:22.123 g = []
2025-07-01 05:49:22.130 if alo < ahi:
2025-07-01 05:49:22.136 if blo < bhi:
2025-07-01 05:49:22.142 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:49:22.148 else:
2025-07-01 05:49:22.155 g = self._dump('-', a, alo, ahi)
2025-07-01 05:49:22.166 elif blo < bhi:
2025-07-01 05:49:22.175 g = self._dump('+', b, blo, bhi)
2025-07-01 05:49:22.186
2025-07-01 05:49:22.195 > yield from g
2025-07-01 05:49:22.206
2025-07-01 05:49:22.213 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:49:22.219 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:49:22.226
2025-07-01 05:49:22.234 self = <difflib.Differ object at [hex]>
2025-07-01 05:49:22.241 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:49:22.247 alo = 364, ahi = 1101
2025-07-01 05:49:22.252 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:49:22.258 blo = 364, bhi = 1101
2025-07-01 05:49:22.263
2025-07-01 05:49:22.267 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:49:22.272 r"""
2025-07-01 05:49:22.277 When replacing one block of lines with another, search the blocks
2025-07-01 05:49:22.282 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:49:22.286 synch point, and intraline difference marking is done on the
2025-07-01 05:49:22.292 similar pair. Lots of work, but often worth it.
2025-07-01 05:49:22.301
2025-07-01 05:49:22.312 Example:
2025-07-01 05:49:22.321
2025-07-01 05:49:22.334 >>> d = Differ()
2025-07-01 05:49:22.346 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:49:22.357 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:49:22.370 >>> print(''.join(results), end="")
2025-07-01 05:49:22.377 - abcDefghiJkl
2025-07-01 05:49:22.392 + abcdefGhijkl
2025-07-01 05:49:22.410 """
2025-07-01 05:49:22.418
2025-07-01 05:49:22.426 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:49:22.439 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:49:22.449 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:49:22.458 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:49:22.469 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:49:22.480
2025-07-01 05:49:22.489 # search for the pair that matches best without being identical
2025-07-01 05:49:22.498 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:49:22.504 # on junk -- unless we have to)
2025-07-01 05:49:22.510 for j in range(blo, bhi):
2025-07-01 05:49:22.521 bj = b[j]
2025-07-01 05:49:22.532 cruncher.set_seq2(bj)
2025-07-01 05:49:22.539 for i in range(alo, ahi):
2025-07-01 05:49:22.547 ai = a[i]
2025-07-01 05:49:22.556 if ai == bj:
2025-07-01 05:49:22.568 if eqi is None:
2025-07-01 05:49:22.577 eqi, eqj = i, j
2025-07-01 05:49:22.583 continue
2025-07-01 05:49:22.589 cruncher.set_seq1(ai)
2025-07-01 05:49:22.595 # computing similarity is expensive, so use the quick
2025-07-01 05:49:22.600 # upper bounds first -- have seen this speed up messy
2025-07-01 05:49:22.605 # compares by a factor of 3.
2025-07-01 05:49:22.610 # note that ratio() is only expensive to compute the first
2025-07-01 05:49:22.615 # time it's called on a sequence pair; the expensive part
2025-07-01 05:49:22.620 # of the computation is cached by cruncher
2025-07-01 05:49:22.628 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:49:22.635 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:49:22.642 cruncher.ratio() > best_ratio:
2025-07-01 05:49:22.651 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:49:22.662 if best_ratio < cutoff:
2025-07-01 05:49:22.673 # no non-identical "pretty close" pair
2025-07-01 05:49:22.685 if eqi is None:
2025-07-01 05:49:22.692 # no identical pair either -- treat it as a straight replace
2025-07-01 05:49:22.698 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:49:22.702 return
2025-07-01 05:49:22.708 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:49:22.714 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:49:22.719 else:
2025-07-01 05:49:22.724 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:49:22.730 eqi = None
2025-07-01 05:49:22.740
2025-07-01 05:49:22.750 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:49:22.760 # identical
2025-07-01 05:49:22.772
2025-07-01 05:49:22.782 # pump out diffs from before the synch point
2025-07-01 05:49:22.793 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:49:22.803
2025-07-01 05:49:22.812 # do intraline marking on the synch pair
2025-07-01 05:49:22.819 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:49:22.826 if eqi is None:
2025-07-01 05:49:22.839 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:49:22.850 atags = btags = ""
2025-07-01 05:49:22.859 cruncher.set_seqs(aelt, belt)
2025-07-01 05:49:22.870 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:49:22.881 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:49:22.889 if tag == 'replace':
2025-07-01 05:49:22.897 atags += '^' * la
2025-07-01 05:49:22.903 btags += '^' * lb
2025-07-01 05:49:22.910 elif tag == 'delete':
2025-07-01 05:49:22.916 atags += '-' * la
2025-07-01 05:49:22.925 elif tag == 'insert':
2025-07-01 05:49:22.936 btags += '+' * lb
2025-07-01 05:49:22.947 elif tag == 'equal':
2025-07-01 05:49:22.958 atags += ' ' * la
2025-07-01 05:49:22.968 btags += ' ' * lb
2025-07-01 05:49:22.976 else:
2025-07-01 05:49:22.981 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:49:22.990 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:49:22.999 else:
2025-07-01 05:49:23.007 # the synch pair is identical
2025-07-01 05:49:23.017 yield ' ' + aelt
2025-07-01 05:49:23.024
2025-07-01 05:49:23.031 # pump out diffs from after the synch point
2025-07-01 05:49:23.039 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:49:23.050
2025-07-01 05:49:23.058 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:49:23.067 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:49:23.073
2025-07-01 05:49:23.079 self = <difflib.Differ object at [hex]>
2025-07-01 05:49:23.087 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:49:23.100 alo = 365, ahi = 1101
2025-07-01 05:49:23.112 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:49:23.120 blo = 365, bhi = 1101
2025-07-01 05:49:23.127
2025-07-01 05:49:23.135 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:49:23.146 g = []
2025-07-01 05:49:23.154 if alo < ahi:
2025-07-01 05:49:23.161 if blo < bhi:
2025-07-01 05:49:23.177 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:49:23.185 else:
2025-07-01 05:49:23.193 g = self._dump('-', a, alo, ahi)
2025-07-01 05:49:23.203 elif blo < bhi:
2025-07-01 05:49:23.216 g = self._dump('+', b, blo, bhi)
2025-07-01 05:49:23.226
2025-07-01 05:49:23.236 > yield from g
2025-07-01 05:49:23.246
2025-07-01 05:49:23.254 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:49:23.262 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:49:23.268
2025-07-01 05:49:23.274 self = <difflib.Differ object at [hex]>
2025-07-01 05:49:23.281 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:49:23.287 alo = 365, ahi = 1101
2025-07-01 05:49:23.294 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:49:23.300 blo = 365, bhi = 1101
2025-07-01 05:49:23.306
2025-07-01 05:49:23.311 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:49:23.317 r"""
2025-07-01 05:49:23.324 When replacing one block of lines with another, search the blocks
2025-07-01 05:49:23.330 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:49:23.336 synch point, and intraline difference marking is done on the
2025-07-01 05:49:23.345 similar pair. Lots of work, but often worth it.
2025-07-01 05:49:23.355
2025-07-01 05:49:23.364 Example:
2025-07-01 05:49:23.372
2025-07-01 05:49:23.385 >>> d = Differ()
2025-07-01 05:49:23.396 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:49:23.404 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:49:23.412 >>> print(''.join(results), end="")
2025-07-01 05:49:23.418 - abcDefghiJkl
2025-07-01 05:49:23.438 + abcdefGhijkl
2025-07-01 05:49:23.452 """
2025-07-01 05:49:23.458
2025-07-01 05:49:23.468 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:49:23.479 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:49:23.488 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:49:23.496 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:49:23.507 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:49:23.523
2025-07-01 05:49:23.536 # search for the pair that matches best without being identical
2025-07-01 05:49:23.550 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:49:23.562 # on junk -- unless we have to)
2025-07-01 05:49:23.572 for j in range(blo, bhi):
2025-07-01 05:49:23.580 bj = b[j]
2025-07-01 05:49:23.588 cruncher.set_seq2(bj)
2025-07-01 05:49:23.595 for i in range(alo, ahi):
2025-07-01 05:49:23.608 ai = a[i]
2025-07-01 05:49:23.617 if ai == bj:
2025-07-01 05:49:23.626 if eqi is None:
2025-07-01 05:49:23.633 eqi, eqj = i, j
2025-07-01 05:49:23.639 continue
2025-07-01 05:49:23.648 cruncher.set_seq1(ai)
2025-07-01 05:49:23.658 # computing similarity is expensive, so use the quick
2025-07-01 05:49:23.665 # upper bounds first -- have seen this speed up messy
2025-07-01 05:49:23.670 # compares by a factor of 3.
2025-07-01 05:49:23.679 # note that ratio() is only expensive to compute the first
2025-07-01 05:49:23.687 # time it's called on a sequence pair; the expensive part
2025-07-01 05:49:23.694 # of the computation is cached by cruncher
2025-07-01 05:49:23.701 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:49:23.707 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:49:23.719 cruncher.ratio() > best_ratio:
2025-07-01 05:49:23.729 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:49:23.736 if best_ratio < cutoff:
2025-07-01 05:49:23.742 # no non-identical "pretty close" pair
2025-07-01 05:49:23.747 if eqi is None:
2025-07-01 05:49:23.752 # no identical pair either -- treat it as a straight replace
2025-07-01 05:49:23.757 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:49:23.763 return
2025-07-01 05:49:23.769 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:49:23.776 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:49:23.783 else:
2025-07-01 05:49:23.788 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:49:23.793 eqi = None
2025-07-01 05:49:23.798
2025-07-01 05:49:23.804 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:49:23.809 # identical
2025-07-01 05:49:23.815
2025-07-01 05:49:23.820 # pump out diffs from before the synch point
2025-07-01 05:49:23.826 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:49:23.832
2025-07-01 05:49:23.837 # do intraline marking on the synch pair
2025-07-01 05:49:23.851 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:49:23.861 if eqi is None:
2025-07-01 05:49:23.869 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:49:23.876 atags = btags = ""
2025-07-01 05:49:23.887 cruncher.set_seqs(aelt, belt)
2025-07-01 05:49:23.898 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:49:23.909 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:49:23.920 if tag == 'replace':
2025-07-01 05:49:23.931 atags += '^' * la
2025-07-01 05:49:23.944 btags += '^' * lb
2025-07-01 05:49:23.954 elif tag == 'delete':
2025-07-01 05:49:23.963 atags += '-' * la
2025-07-01 05:49:23.975 elif tag == 'insert':
2025-07-01 05:49:23.984 btags += '+' * lb
2025-07-01 05:49:23.992 elif tag == 'equal':
2025-07-01 05:49:23.999 atags += ' ' * la
2025-07-01 05:49:24.007 btags += ' ' * lb
2025-07-01 05:49:24.014 else:
2025-07-01 05:49:24.024 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:49:24.034 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:49:24.041 else:
2025-07-01 05:49:24.048 # the synch pair is identical
2025-07-01 05:49:24.054 yield ' ' + aelt
2025-07-01 05:49:24.060
2025-07-01 05:49:24.066 # pump out diffs from after the synch point
2025-07-01 05:49:24.077 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:49:24.088
2025-07-01 05:49:24.097 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:49:24.109 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:49:24.117
2025-07-01 05:49:24.126 self = <difflib.Differ object at [hex]>
2025-07-01 05:49:24.135 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:49:24.144 alo = 366, ahi = 1101
2025-07-01 05:49:24.153 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:49:24.160 blo = 366, bhi = 1101
2025-07-01 05:49:24.166
2025-07-01 05:49:24.173 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:49:24.178 g = []
2025-07-01 05:49:24.184 if alo < ahi:
2025-07-01 05:49:24.190 if blo < bhi:
2025-07-01 05:49:24.203 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:49:24.211 else:
2025-07-01 05:49:24.223 g = self._dump('-', a, alo, ahi)
2025-07-01 05:49:24.232 elif blo < bhi:
2025-07-01 05:49:24.243 g = self._dump('+', b, blo, bhi)
2025-07-01 05:49:24.251
2025-07-01 05:49:24.260 > yield from g
2025-07-01 05:49:24.268
2025-07-01 05:49:24.275 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:49:24.282 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:49:24.288
2025-07-01 05:49:24.294 self = <difflib.Differ object at [hex]>
2025-07-01 05:49:24.303 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:49:24.313 alo = 366, ahi = 1101
2025-07-01 05:49:24.322 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:49:24.330 blo = 366, bhi = 1101
2025-07-01 05:49:24.340
2025-07-01 05:49:24.353 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:49:24.365 r"""
2025-07-01 05:49:24.376 When replacing one block of lines with another, search the blocks
2025-07-01 05:49:24.385 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:49:24.393 synch point, and intraline difference marking is done on the
2025-07-01 05:49:24.399 similar pair. Lots of work, but often worth it.
2025-07-01 05:49:24.404
2025-07-01 05:49:24.408 Example:
2025-07-01 05:49:24.420
2025-07-01 05:49:24.430 >>> d = Differ()
2025-07-01 05:49:24.438 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:49:24.445 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:49:24.453 >>> print(''.join(results), end="")
2025-07-01 05:49:24.459 - abcDefghiJkl
2025-07-01 05:49:24.475 + abcdefGhijkl
2025-07-01 05:49:24.494 """
2025-07-01 05:49:24.501
2025-07-01 05:49:24.508 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:49:24.516 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:49:24.522 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:49:24.528 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:49:24.534 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:49:24.547
2025-07-01 05:49:24.559 # search for the pair that matches best without being identical
2025-07-01 05:49:24.571 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:49:24.581 # on junk -- unless we have to)
2025-07-01 05:49:24.591 for j in range(blo, bhi):
2025-07-01 05:49:24.602 bj = b[j]
2025-07-01 05:49:24.611 cruncher.set_seq2(bj)
2025-07-01 05:49:24.621 for i in range(alo, ahi):
2025-07-01 05:49:24.632 ai = a[i]
2025-07-01 05:49:24.642 if ai == bj:
2025-07-01 05:49:24.652 if eqi is None:
2025-07-01 05:49:24.660 eqi, eqj = i, j
2025-07-01 05:49:24.667 continue
2025-07-01 05:49:24.679 cruncher.set_seq1(ai)
2025-07-01 05:49:24.690 # computing similarity is expensive, so use the quick
2025-07-01 05:49:24.700 # upper bounds first -- have seen this speed up messy
2025-07-01 05:49:24.707 # compares by a factor of 3.
2025-07-01 05:49:24.713 # note that ratio() is only expensive to compute the first
2025-07-01 05:49:24.718 # time it's called on a sequence pair; the expensive part
2025-07-01 05:49:24.723 # of the computation is cached by cruncher
2025-07-01 05:49:24.729 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:49:24.736 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:49:24.742 cruncher.ratio() > best_ratio:
2025-07-01 05:49:24.750 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:49:24.758 if best_ratio < cutoff:
2025-07-01 05:49:24.766 # no non-identical "pretty close" pair
2025-07-01 05:49:24.773 if eqi is None:
2025-07-01 05:49:24.779 # no identical pair either -- treat it as a straight replace
2025-07-01 05:49:24.786 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:49:24.792 return
2025-07-01 05:49:24.798 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:49:24.804 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:49:24.813 else:
2025-07-01 05:49:24.825 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:49:24.837 eqi = None
2025-07-01 05:49:24.850
2025-07-01 05:49:24.862 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:49:24.875 # identical
2025-07-01 05:49:24.885
2025-07-01 05:49:24.893 # pump out diffs from before the synch point
2025-07-01 05:49:24.900 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:49:24.907
2025-07-01 05:49:24.913 # do intraline marking on the synch pair
2025-07-01 05:49:24.919 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:49:24.925 if eqi is None:
2025-07-01 05:49:24.930 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:49:24.936 atags = btags = ""
2025-07-01 05:49:24.942 cruncher.set_seqs(aelt, belt)
2025-07-01 05:49:24.948 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:49:24.954 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:49:24.959 if tag == 'replace':
2025-07-01 05:49:24.967 atags += '^' * la
2025-07-01 05:49:24.978 btags += '^' * lb
2025-07-01 05:49:24.986 elif tag == 'delete':
2025-07-01 05:49:24.993 atags += '-' * la
2025-07-01 05:49:24.998 elif tag == 'insert':
2025-07-01 05:49:25.004 btags += '+' * lb
2025-07-01 05:49:25.011 elif tag == 'equal':
2025-07-01 05:49:25.022 atags += ' ' * la
2025-07-01 05:49:25.032 btags += ' ' * lb
2025-07-01 05:49:25.040 else:
2025-07-01 05:49:25.047 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:49:25.053 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:49:25.058 else:
2025-07-01 05:49:25.064 # the synch pair is identical
2025-07-01 05:49:25.072 yield ' ' + aelt
2025-07-01 05:49:25.079
2025-07-01 05:49:25.089 # pump out diffs from after the synch point
2025-07-01 05:49:25.100 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:49:25.109
2025-07-01 05:49:25.121 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:49:25.133 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:49:25.144
2025-07-01 05:49:25.155 self = <difflib.Differ object at [hex]>
2025-07-01 05:49:25.166 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:49:25.177 alo = 367, ahi = 1101
2025-07-01 05:49:25.189 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:49:25.201 blo = 367, bhi = 1101
2025-07-01 05:49:25.212
2025-07-01 05:49:25.222 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:49:25.232 g = []
2025-07-01 05:49:25.244 if alo < ahi:
2025-07-01 05:49:25.253 if blo < bhi:
2025-07-01 05:49:25.266 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:49:25.277 else:
2025-07-01 05:49:25.287 g = self._dump('-', a, alo, ahi)
2025-07-01 05:49:25.295 elif blo < bhi:
2025-07-01 05:49:25.303 g = self._dump('+', b, blo, bhi)
2025-07-01 05:49:25.309
2025-07-01 05:49:25.314 > yield from g
2025-07-01 05:49:25.324
2025-07-01 05:49:25.334 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:49:25.343 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:49:25.352
2025-07-01 05:49:25.360 self = <difflib.Differ object at [hex]>
2025-07-01 05:49:25.369 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:49:25.375 alo = 367, ahi = 1101
2025-07-01 05:49:25.382 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:49:25.388 blo = 367, bhi = 1101
2025-07-01 05:49:25.394
2025-07-01 05:49:25.400 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:49:25.406 r"""
2025-07-01 05:49:25.416 When replacing one block of lines with another, search the blocks
2025-07-01 05:49:25.425 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:49:25.433 synch point, and intraline difference marking is done on the
2025-07-01 05:49:25.439 similar pair. Lots of work, but often worth it.
2025-07-01 05:49:25.447
2025-07-01 05:49:25.457 Example:
2025-07-01 05:49:25.468
2025-07-01 05:49:25.479 >>> d = Differ()
2025-07-01 05:49:25.487 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:49:25.494 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:49:25.501 >>> print(''.join(results), end="")
2025-07-01 05:49:25.507 - abcDefghiJkl
2025-07-01 05:49:25.527 + abcdefGhijkl
2025-07-01 05:49:25.545 """
2025-07-01 05:49:25.552
2025-07-01 05:49:25.564 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:49:25.571 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:49:25.577 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:49:25.582 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:49:25.588 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:49:25.594
2025-07-01 05:49:25.599 # search for the pair that matches best without being identical
2025-07-01 05:49:25.605 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:49:25.611 # on junk -- unless we have to)
2025-07-01 05:49:25.617 for j in range(blo, bhi):
2025-07-01 05:49:25.622 bj = b[j]
2025-07-01 05:49:25.628 cruncher.set_seq2(bj)
2025-07-01 05:49:25.634 for i in range(alo, ahi):
2025-07-01 05:49:25.639 ai = a[i]
2025-07-01 05:49:25.645 if ai == bj:
2025-07-01 05:49:25.651 if eqi is None:
2025-07-01 05:49:25.656 eqi, eqj = i, j
2025-07-01 05:49:25.662 continue
2025-07-01 05:49:25.668 cruncher.set_seq1(ai)
2025-07-01 05:49:25.675 # computing similarity is expensive, so use the quick
2025-07-01 05:49:25.680 # upper bounds first -- have seen this speed up messy
2025-07-01 05:49:25.686 # compares by a factor of 3.
2025-07-01 05:49:25.692 # note that ratio() is only expensive to compute the first
2025-07-01 05:49:25.697 # time it's called on a sequence pair; the expensive part
2025-07-01 05:49:25.708 # of the computation is cached by cruncher
2025-07-01 05:49:25.718 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:49:25.727 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:49:25.734 cruncher.ratio() > best_ratio:
2025-07-01 05:49:25.743 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:49:25.754 if best_ratio < cutoff:
2025-07-01 05:49:25.762 # no non-identical "pretty close" pair
2025-07-01 05:49:25.768 if eqi is None:
2025-07-01 05:49:25.774 # no identical pair either -- treat it as a straight replace
2025-07-01 05:49:25.779 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:49:25.783 return
2025-07-01 05:49:25.788 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:49:25.793 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:49:25.800 else:
2025-07-01 05:49:25.811 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:49:25.822 eqi = None
2025-07-01 05:49:25.831
2025-07-01 05:49:25.838 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:49:25.845 # identical
2025-07-01 05:49:25.851
2025-07-01 05:49:25.857 # pump out diffs from before the synch point
2025-07-01 05:49:25.862 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:49:25.866
2025-07-01 05:49:25.871 # do intraline marking on the synch pair
2025-07-01 05:49:25.876 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:49:25.880 if eqi is None:
2025-07-01 05:49:25.885 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:49:25.889 atags = btags = ""
2025-07-01 05:49:25.894 cruncher.set_seqs(aelt, belt)
2025-07-01 05:49:25.898 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:49:25.902 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:49:25.907 if tag == 'replace':
2025-07-01 05:49:25.911 atags += '^' * la
2025-07-01 05:49:25.923 btags += '^' * lb
2025-07-01 05:49:25.932 elif tag == 'delete':
2025-07-01 05:49:25.941 atags += '-' * la
2025-07-01 05:49:25.949 elif tag == 'insert':
2025-07-01 05:49:25.956 btags += '+' * lb
2025-07-01 05:49:25.963 elif tag == 'equal':
2025-07-01 05:49:25.971 atags += ' ' * la
2025-07-01 05:49:25.983 btags += ' ' * lb
2025-07-01 05:49:25.991 else:
2025-07-01 05:49:25.999 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:49:26.006 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:49:26.015 else:
2025-07-01 05:49:26.026 # the synch pair is identical
2025-07-01 05:49:26.034 yield ' ' + aelt
2025-07-01 05:49:26.042
2025-07-01 05:49:26.054 # pump out diffs from after the synch point
2025-07-01 05:49:26.066 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:49:26.078
2025-07-01 05:49:26.090 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:49:26.103 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:49:26.113
2025-07-01 05:49:26.125 self = <difflib.Differ object at [hex]>
2025-07-01 05:49:26.135 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:49:26.144 alo = 368, ahi = 1101
2025-07-01 05:49:26.155 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:49:26.163 blo = 368, bhi = 1101
2025-07-01 05:49:26.169
2025-07-01 05:49:26.174 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:49:26.180 g = []
2025-07-01 05:49:26.184 if alo < ahi:
2025-07-01 05:49:26.189 if blo < bhi:
2025-07-01 05:49:26.194 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:49:26.199 else:
2025-07-01 05:49:26.205 g = self._dump('-', a, alo, ahi)
2025-07-01 05:49:26.212 elif blo < bhi:
2025-07-01 05:49:26.219 g = self._dump('+', b, blo, bhi)
2025-07-01 05:49:26.226
2025-07-01 05:49:26.237 > yield from g
2025-07-01 05:49:26.245
2025-07-01 05:49:26.252 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:49:26.257 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:49:26.264
2025-07-01 05:49:26.274 self = <difflib.Differ object at [hex]>
2025-07-01 05:49:26.285 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:49:26.297 alo = 368, ahi = 1101
2025-07-01 05:49:26.309 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:49:26.319 blo = 368, bhi = 1101
2025-07-01 05:49:26.329
2025-07-01 05:49:26.340 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:49:26.350 r"""
2025-07-01 05:49:26.357 When replacing one block of lines with another, search the blocks
2025-07-01 05:49:26.364 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:49:26.376 synch point, and intraline difference marking is done on the
2025-07-01 05:49:26.387 similar pair. Lots of work, but often worth it.
2025-07-01 05:49:26.396
2025-07-01 05:49:26.403 Example:
2025-07-01 05:49:26.410
2025-07-01 05:49:26.421 >>> d = Differ()
2025-07-01 05:49:26.429 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:49:26.442 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:49:26.452 >>> print(''.join(results), end="")
2025-07-01 05:49:26.459 - abcDefghiJkl
2025-07-01 05:49:26.470 + abcdefGhijkl
2025-07-01 05:49:26.481 """
2025-07-01 05:49:26.487
2025-07-01 05:49:26.497 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:49:26.510 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:49:26.521 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:49:26.530 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:49:26.538 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:49:26.544
2025-07-01 05:49:26.552 # search for the pair that matches best without being identical
2025-07-01 05:49:26.565 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:49:26.573 # on junk -- unless we have to)
2025-07-01 05:49:26.579 for j in range(blo, bhi):
2025-07-01 05:49:26.584 bj = b[j]
2025-07-01 05:49:26.590 cruncher.set_seq2(bj)
2025-07-01 05:49:26.603 for i in range(alo, ahi):
2025-07-01 05:49:26.612 ai = a[i]
2025-07-01 05:49:26.619 if ai == bj:
2025-07-01 05:49:26.625 if eqi is None:
2025-07-01 05:49:26.631 eqi, eqj = i, j
2025-07-01 05:49:26.643 continue
2025-07-01 05:49:26.652 cruncher.set_seq1(ai)
2025-07-01 05:49:26.659 # computing similarity is expensive, so use the quick
2025-07-01 05:49:26.666 # upper bounds first -- have seen this speed up messy
2025-07-01 05:49:26.673 # compares by a factor of 3.
2025-07-01 05:49:26.679 # note that ratio() is only expensive to compute the first
2025-07-01 05:49:26.685 # time it's called on a sequence pair; the expensive part
2025-07-01 05:49:26.691 # of the computation is cached by cruncher
2025-07-01 05:49:26.697 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:49:26.703 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:49:26.709 cruncher.ratio() > best_ratio:
2025-07-01 05:49:26.715 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:49:26.721 if best_ratio < cutoff:
2025-07-01 05:49:26.727 # no non-identical "pretty close" pair
2025-07-01 05:49:26.737 if eqi is None:
2025-07-01 05:49:26.749 # no identical pair either -- treat it as a straight replace
2025-07-01 05:49:26.759 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:49:26.767 return
2025-07-01 05:49:26.774 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:49:26.781 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:49:26.786 else:
2025-07-01 05:49:26.792 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:49:26.797 eqi = None
2025-07-01 05:49:26.803
2025-07-01 05:49:26.809 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:49:26.815 # identical
2025-07-01 05:49:26.820
2025-07-01 05:49:26.827 # pump out diffs from before the synch point
2025-07-01 05:49:26.834 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:49:26.840
2025-07-01 05:49:26.847 # do intraline marking on the synch pair
2025-07-01 05:49:26.855 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:49:26.865 if eqi is None:
2025-07-01 05:49:26.873 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:49:26.880 atags = btags = ""
2025-07-01 05:49:26.886 cruncher.set_seqs(aelt, belt)
2025-07-01 05:49:26.891 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:49:26.897 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:49:26.903 if tag == 'replace':
2025-07-01 05:49:26.911 atags += '^' * la
2025-07-01 05:49:26.922 btags += '^' * lb
2025-07-01 05:49:26.930 elif tag == 'delete':
2025-07-01 05:49:26.936 atags += '-' * la
2025-07-01 05:49:26.942 elif tag == 'insert':
2025-07-01 05:49:26.947 btags += '+' * lb
2025-07-01 05:49:26.952 elif tag == 'equal':
2025-07-01 05:49:26.958 atags += ' ' * la
2025-07-01 05:49:26.964 btags += ' ' * lb
2025-07-01 05:49:26.970 else:
2025-07-01 05:49:26.976 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:49:26.982 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:49:26.990 else:
2025-07-01 05:49:26.998 # the synch pair is identical
2025-07-01 05:49:27.004 yield ' ' + aelt
2025-07-01 05:49:27.010
2025-07-01 05:49:27.016 # pump out diffs from after the synch point
2025-07-01 05:49:27.021 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:49:27.027
2025-07-01 05:49:27.033 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:49:27.039 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:49:27.047
2025-07-01 05:49:27.058 self = <difflib.Differ object at [hex]>
2025-07-01 05:49:27.068 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:49:27.076 alo = 369, ahi = 1101
2025-07-01 05:49:27.082 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:49:27.087 blo = 369, bhi = 1101
2025-07-01 05:49:27.092
2025-07-01 05:49:27.097 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:49:27.102 g = []
2025-07-01 05:49:27.107 if alo < ahi:
2025-07-01 05:49:27.112 if blo < bhi:
2025-07-01 05:49:27.118 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:49:27.124 else:
2025-07-01 05:49:27.129 g = self._dump('-', a, alo, ahi)
2025-07-01 05:49:27.134 elif blo < bhi:
2025-07-01 05:49:27.144 g = self._dump('+', b, blo, bhi)
2025-07-01 05:49:27.155
2025-07-01 05:49:27.163 > yield from g
2025-07-01 05:49:27.169
2025-07-01 05:49:27.176 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:49:27.183 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:49:27.189
2025-07-01 05:49:27.196 self = <difflib.Differ object at [hex]>
2025-07-01 05:49:27.208 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:49:27.217 alo = 369, ahi = 1101
2025-07-01 05:49:27.225 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:49:27.231 blo = 369, bhi = 1101
2025-07-01 05:49:27.236
2025-07-01 05:49:27.242 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:49:27.248 r"""
2025-07-01 05:49:27.254 When replacing one block of lines with another, search the blocks
2025-07-01 05:49:27.260 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:49:27.265 synch point, and intraline difference marking is done on the
2025-07-01 05:49:27.271 similar pair. Lots of work, but often worth it.
2025-07-01 05:49:27.276
2025-07-01 05:49:27.282 Example:
2025-07-01 05:49:27.290
2025-07-01 05:49:27.299 >>> d = Differ()
2025-07-01 05:49:27.306 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:49:27.313 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:49:27.319 >>> print(''.join(results), end="")
2025-07-01 05:49:27.327 - abcDefghiJkl
2025-07-01 05:49:27.348 + abcdefGhijkl
2025-07-01 05:49:27.361 """
2025-07-01 05:49:27.367
2025-07-01 05:49:27.373 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:49:27.384 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:49:27.389 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:49:27.395 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:49:27.401 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:49:27.408
2025-07-01 05:49:27.412 # search for the pair that matches best without being identical
2025-07-01 05:49:27.417 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:49:27.422 # on junk -- unless we have to)
2025-07-01 05:49:27.428 for j in range(blo, bhi):
2025-07-01 05:49:27.434 bj = b[j]
2025-07-01 05:49:27.440 cruncher.set_seq2(bj)
2025-07-01 05:49:27.446 for i in range(alo, ahi):
2025-07-01 05:49:27.457 ai = a[i]
2025-07-01 05:49:27.466 if ai == bj:
2025-07-01 05:49:27.474 if eqi is None:
2025-07-01 05:49:27.480 eqi, eqj = i, j
2025-07-01 05:49:27.484 continue
2025-07-01 05:49:27.489 cruncher.set_seq1(ai)
2025-07-01 05:49:27.495 # computing similarity is expensive, so use the quick
2025-07-01 05:49:27.500 # upper bounds first -- have seen this speed up messy
2025-07-01 05:49:27.506 # compares by a factor of 3.
2025-07-01 05:49:27.512 # note that ratio() is only expensive to compute the first
2025-07-01 05:49:27.518 # time it's called on a sequence pair; the expensive part
2025-07-01 05:49:27.524 # of the computation is cached by cruncher
2025-07-01 05:49:27.531 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:49:27.538 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:49:27.545 cruncher.ratio() > best_ratio:
2025-07-01 05:49:27.557 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:49:27.566 if best_ratio < cutoff:
2025-07-01 05:49:27.572 # no non-identical "pretty close" pair
2025-07-01 05:49:27.578 if eqi is None:
2025-07-01 05:49:27.584 # no identical pair either -- treat it as a straight replace
2025-07-01 05:49:27.589 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:49:27.596 return
2025-07-01 05:49:27.602 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:49:27.608 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:49:27.614 else:
2025-07-01 05:49:27.620 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:49:27.626 eqi = None
2025-07-01 05:49:27.632
2025-07-01 05:49:27.639 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:49:27.645 # identical
2025-07-01 05:49:27.652
2025-07-01 05:49:27.658 # pump out diffs from before the synch point
2025-07-01 05:49:27.667 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:49:27.677
2025-07-01 05:49:27.686 # do intraline marking on the synch pair
2025-07-01 05:49:27.693 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:49:27.699 if eqi is None:
2025-07-01 05:49:27.704 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:49:27.708 atags = btags = ""
2025-07-01 05:49:27.714 cruncher.set_seqs(aelt, belt)
2025-07-01 05:49:27.721 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:49:27.727 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:49:27.733 if tag == 'replace':
2025-07-01 05:49:27.738 atags += '^' * la
2025-07-01 05:49:27.743 btags += '^' * lb
2025-07-01 05:49:27.753 elif tag == 'delete':
2025-07-01 05:49:27.763 atags += '-' * la
2025-07-01 05:49:27.770 elif tag == 'insert':
2025-07-01 05:49:27.780 btags += '+' * lb
2025-07-01 05:49:27.791 elif tag == 'equal':
2025-07-01 05:49:27.802 atags += ' ' * la
2025-07-01 05:49:27.814 btags += ' ' * lb
2025-07-01 05:49:27.822 else:
2025-07-01 05:49:27.831 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:49:27.838 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:49:27.845 else:
2025-07-01 05:49:27.851 # the synch pair is identical
2025-07-01 05:49:27.857 yield ' ' + aelt
2025-07-01 05:49:27.863
2025-07-01 05:49:27.873 # pump out diffs from after the synch point
2025-07-01 05:49:27.880 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:49:27.886
2025-07-01 05:49:27.892 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:49:27.899 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:49:27.904
2025-07-01 05:49:27.913 self = <difflib.Differ object at [hex]>
2025-07-01 05:49:27.926 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:49:27.936 alo = 370, ahi = 1101
2025-07-01 05:49:27.945 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:49:27.951 blo = 370, bhi = 1101
2025-07-01 05:49:27.959
2025-07-01 05:49:27.970 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:49:27.978 g = []
2025-07-01 05:49:27.986 if alo < ahi:
2025-07-01 05:49:27.991 if blo < bhi:
2025-07-01 05:49:27.999 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:49:28.011 else:
2025-07-01 05:49:28.018 g = self._dump('-', a, alo, ahi)
2025-07-01 05:49:28.026 elif blo < bhi:
2025-07-01 05:49:28.034 g = self._dump('+', b, blo, bhi)
2025-07-01 05:49:28.045
2025-07-01 05:49:28.054 > yield from g
2025-07-01 05:49:28.066
2025-07-01 05:49:28.075 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:49:28.083 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:49:28.090
2025-07-01 05:49:28.097 self = <difflib.Differ object at [hex]>
2025-07-01 05:49:28.108 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:49:28.117 alo = 370, ahi = 1101
2025-07-01 05:49:28.127 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:49:28.135 blo = 370, bhi = 1101
2025-07-01 05:49:28.143
2025-07-01 05:49:28.154 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:49:28.165 r"""
2025-07-01 05:49:28.176 When replacing one block of lines with another, search the blocks
2025-07-01 05:49:28.184 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:49:28.191 synch point, and intraline difference marking is done on the
2025-07-01 05:49:28.197 similar pair. Lots of work, but often worth it.
2025-07-01 05:49:28.202
2025-07-01 05:49:28.212 Example:
2025-07-01 05:49:28.221
2025-07-01 05:49:28.229 >>> d = Differ()
2025-07-01 05:49:28.235 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:49:28.243 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:49:28.253 >>> print(''.join(results), end="")
2025-07-01 05:49:28.261 - abcDefghiJkl
2025-07-01 05:49:28.274 + abcdefGhijkl
2025-07-01 05:49:28.286 """
2025-07-01 05:49:28.291
2025-07-01 05:49:28.298 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:49:28.310 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:49:28.319 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:49:28.326 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:49:28.336 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:49:28.345
2025-07-01 05:49:28.353 # search for the pair that matches best without being identical
2025-07-01 05:49:28.359 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:49:28.365 # on junk -- unless we have to)
2025-07-01 05:49:28.371 for j in range(blo, bhi):
2025-07-01 05:49:28.379 bj = b[j]
2025-07-01 05:49:28.391 cruncher.set_seq2(bj)
2025-07-01 05:49:28.398 for i in range(alo, ahi):
2025-07-01 05:49:28.405 ai = a[i]
2025-07-01 05:49:28.411 if ai == bj:
2025-07-01 05:49:28.415 if eqi is None:
2025-07-01 05:49:28.420 eqi, eqj = i, j
2025-07-01 05:49:28.424 continue
2025-07-01 05:49:28.429 cruncher.set_seq1(ai)
2025-07-01 05:49:28.435 # computing similarity is expensive, so use the quick
2025-07-01 05:49:28.440 # upper bounds first -- have seen this speed up messy
2025-07-01 05:49:28.446 # compares by a factor of 3.
2025-07-01 05:49:28.452 # note that ratio() is only expensive to compute the first
2025-07-01 05:49:28.458 # time it's called on a sequence pair; the expensive part
2025-07-01 05:49:28.466 # of the computation is cached by cruncher
2025-07-01 05:49:28.474 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:49:28.480 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:49:28.486 cruncher.ratio() > best_ratio:
2025-07-01 05:49:28.491 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:49:28.496 if best_ratio < cutoff:
2025-07-01 05:49:28.500 # no non-identical "pretty close" pair
2025-07-01 05:49:28.505 if eqi is None:
2025-07-01 05:49:28.511 # no identical pair either -- treat it as a straight replace
2025-07-01 05:49:28.516 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:49:28.522 return
2025-07-01 05:49:28.530 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:49:28.536 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:49:28.541 else:
2025-07-01 05:49:28.546 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:49:28.551 eqi = None
2025-07-01 05:49:28.556
2025-07-01 05:49:28.562 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:49:28.567 # identical
2025-07-01 05:49:28.573
2025-07-01 05:49:28.579 # pump out diffs from before the synch point
2025-07-01 05:49:28.586 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:49:28.595
2025-07-01 05:49:28.608 # do intraline marking on the synch pair
2025-07-01 05:49:28.619 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:49:28.628 if eqi is None:
2025-07-01 05:49:28.634 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:49:28.639 atags = btags = ""
2025-07-01 05:49:28.643 cruncher.set_seqs(aelt, belt)
2025-07-01 05:49:28.650 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:49:28.658 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:49:28.668 if tag == 'replace':
2025-07-01 05:49:28.676 atags += '^' * la
2025-07-01 05:49:28.682 btags += '^' * lb
2025-07-01 05:49:28.688 elif tag == 'delete':
2025-07-01 05:49:28.693 atags += '-' * la
2025-07-01 05:49:28.700 elif tag == 'insert':
2025-07-01 05:49:28.706 btags += '+' * lb
2025-07-01 05:49:28.712 elif tag == 'equal':
2025-07-01 05:49:28.718 atags += ' ' * la
2025-07-01 05:49:28.725 btags += ' ' * lb
2025-07-01 05:49:28.731 else:
2025-07-01 05:49:28.737 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:49:28.743 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:49:28.750 else:
2025-07-01 05:49:28.756 # the synch pair is identical
2025-07-01 05:49:28.762 yield ' ' + aelt
2025-07-01 05:49:28.768
2025-07-01 05:49:28.777 # pump out diffs from after the synch point
2025-07-01 05:49:28.785 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:49:28.792
2025-07-01 05:49:28.798 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:49:28.807 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:49:28.818
2025-07-01 05:49:28.826 self = <difflib.Differ object at [hex]>
2025-07-01 05:49:28.833 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:49:28.838 alo = 371, ahi = 1101
2025-07-01 05:49:28.846 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:49:28.852 blo = 371, bhi = 1101
2025-07-01 05:49:28.857
2025-07-01 05:49:28.871 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:49:28.883 g = []
2025-07-01 05:49:28.893 if alo < ahi:
2025-07-01 05:49:28.904 if blo < bhi:
2025-07-01 05:49:28.916 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:49:28.925 else:
2025-07-01 05:49:28.937 g = self._dump('-', a, alo, ahi)
2025-07-01 05:49:28.947 elif blo < bhi:
2025-07-01 05:49:28.956 g = self._dump('+', b, blo, bhi)
2025-07-01 05:49:28.962
2025-07-01 05:49:28.974 > yield from g
2025-07-01 05:49:28.984
2025-07-01 05:49:28.993 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:49:29.000 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:49:29.008
2025-07-01 05:49:29.014 self = <difflib.Differ object at [hex]>
2025-07-01 05:49:29.020 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:49:29.025 alo = 371, ahi = 1101
2025-07-01 05:49:29.030 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:49:29.035 blo = 371, bhi = 1101
2025-07-01 05:49:29.039
2025-07-01 05:49:29.045 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:49:29.050 r"""
2025-07-01 05:49:29.060 When replacing one block of lines with another, search the blocks
2025-07-01 05:49:29.068 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:49:29.076 synch point, and intraline difference marking is done on the
2025-07-01 05:49:29.083 similar pair. Lots of work, but often worth it.
2025-07-01 05:49:29.090
2025-07-01 05:49:29.101 Example:
2025-07-01 05:49:29.109
2025-07-01 05:49:29.116 >>> d = Differ()
2025-07-01 05:49:29.123 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:49:29.129 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:49:29.135 >>> print(''.join(results), end="")
2025-07-01 05:49:29.140 - abcDefghiJkl
2025-07-01 05:49:29.156 + abcdefGhijkl
2025-07-01 05:49:29.173 """
2025-07-01 05:49:29.179
2025-07-01 05:49:29.186 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:49:29.196 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:49:29.204 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:49:29.211 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:49:29.219 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:49:29.229
2025-07-01 05:49:29.239 # search for the pair that matches best without being identical
2025-07-01 05:49:29.247 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:49:29.255 # on junk -- unless we have to)
2025-07-01 05:49:29.265 for j in range(blo, bhi):
2025-07-01 05:49:29.273 bj = b[j]
2025-07-01 05:49:29.280 cruncher.set_seq2(bj)
2025-07-01 05:49:29.287 for i in range(alo, ahi):
2025-07-01 05:49:29.295 ai = a[i]
2025-07-01 05:49:29.305 if ai == bj:
2025-07-01 05:49:29.315 if eqi is None:
2025-07-01 05:49:29.326 eqi, eqj = i, j
2025-07-01 05:49:29.338 continue
2025-07-01 05:49:29.349 cruncher.set_seq1(ai)
2025-07-01 05:49:29.358 # computing similarity is expensive, so use the quick
2025-07-01 05:49:29.365 # upper bounds first -- have seen this speed up messy
2025-07-01 05:49:29.371 # compares by a factor of 3.
2025-07-01 05:49:29.378 # note that ratio() is only expensive to compute the first
2025-07-01 05:49:29.384 # time it's called on a sequence pair; the expensive part
2025-07-01 05:49:29.391 # of the computation is cached by cruncher
2025-07-01 05:49:29.399 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:49:29.409 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:49:29.418 cruncher.ratio() > best_ratio:
2025-07-01 05:49:29.427 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:49:29.435 if best_ratio < cutoff:
2025-07-01 05:49:29.442 # no non-identical "pretty close" pair
2025-07-01 05:49:29.451 if eqi is None:
2025-07-01 05:49:29.464 # no identical pair either -- treat it as a straight replace
2025-07-01 05:49:29.474 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:49:29.482 return
2025-07-01 05:49:29.490 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:49:29.501 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:49:29.513 else:
2025-07-01 05:49:29.527 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:49:29.539 eqi = None
2025-07-01 05:49:29.547
2025-07-01 05:49:29.553 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:49:29.560 # identical
2025-07-01 05:49:29.564
2025-07-01 05:49:29.569 # pump out diffs from before the synch point
2025-07-01 05:49:29.573 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:49:29.578
2025-07-01 05:49:29.582 # do intraline marking on the synch pair
2025-07-01 05:49:29.590 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:49:29.602 if eqi is None:
2025-07-01 05:49:29.612 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:49:29.619 atags = btags = ""
2025-07-01 05:49:29.626 cruncher.set_seqs(aelt, belt)
2025-07-01 05:49:29.638 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:49:29.647 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:49:29.653 if tag == 'replace':
2025-07-01 05:49:29.661 atags += '^' * la
2025-07-01 05:49:29.669 btags += '^' * lb
2025-07-01 05:49:29.674 elif tag == 'delete':
2025-07-01 05:49:29.686 atags += '-' * la
2025-07-01 05:49:29.696 elif tag == 'insert':
2025-07-01 05:49:29.707 btags += '+' * lb
2025-07-01 05:49:29.716 elif tag == 'equal':
2025-07-01 05:49:29.724 atags += ' ' * la
2025-07-01 05:49:29.731 btags += ' ' * lb
2025-07-01 05:49:29.738 else:
2025-07-01 05:49:29.749 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:49:29.760 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:49:29.767 else:
2025-07-01 05:49:29.772 # the synch pair is identical
2025-07-01 05:49:29.778 yield ' ' + aelt
2025-07-01 05:49:29.788
2025-07-01 05:49:29.798 # pump out diffs from after the synch point
2025-07-01 05:49:29.805 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:49:29.811
2025-07-01 05:49:29.818 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:49:29.829 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:49:29.838
2025-07-01 05:49:29.845 self = <difflib.Differ object at [hex]>
2025-07-01 05:49:29.858 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:49:29.868 alo = 372, ahi = 1101
2025-07-01 05:49:29.877 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:49:29.884 blo = 372, bhi = 1101
2025-07-01 05:49:29.891
2025-07-01 05:49:29.899 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:49:29.910 g = []
2025-07-01 05:49:29.920 if alo < ahi:
2025-07-01 05:49:29.931 if blo < bhi:
2025-07-01 05:49:29.939 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:49:29.950 else:
2025-07-01 05:49:29.958 g = self._dump('-', a, alo, ahi)
2025-07-01 05:49:29.965 elif blo < bhi:
2025-07-01 05:49:29.973 g = self._dump('+', b, blo, bhi)
2025-07-01 05:49:29.980
2025-07-01 05:49:29.987 > yield from g
2025-07-01 05:49:29.996
2025-07-01 05:49:30.006 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:49:30.014 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:49:30.023
2025-07-01 05:49:30.033 self = <difflib.Differ object at [hex]>
2025-07-01 05:49:30.045 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:49:30.055 alo = 372, ahi = 1101
2025-07-01 05:49:30.064 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:49:30.071 blo = 372, bhi = 1101
2025-07-01 05:49:30.077
2025-07-01 05:49:30.088 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:49:30.096 r"""
2025-07-01 05:49:30.104 When replacing one block of lines with another, search the blocks
2025-07-01 05:49:30.111 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:49:30.117 synch point, and intraline difference marking is done on the
2025-07-01 05:49:30.123 similar pair. Lots of work, but often worth it.
2025-07-01 05:49:30.128
2025-07-01 05:49:30.133 Example:
2025-07-01 05:49:30.139
2025-07-01 05:49:30.144 >>> d = Differ()
2025-07-01 05:49:30.150 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:49:30.160 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:49:30.169 >>> print(''.join(results), end="")
2025-07-01 05:49:30.177 - abcDefghiJkl
2025-07-01 05:49:30.190 + abcdefGhijkl
2025-07-01 05:49:30.202 """
2025-07-01 05:49:30.214
2025-07-01 05:49:30.223 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:49:30.230 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:49:30.240 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:49:30.248 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:49:30.256 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:49:30.263
2025-07-01 05:49:30.269 # search for the pair that matches best without being identical
2025-07-01 05:49:30.275 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:49:30.281 # on junk -- unless we have to)
2025-07-01 05:49:30.293 for j in range(blo, bhi):
2025-07-01 05:49:30.303 bj = b[j]
2025-07-01 05:49:30.311 cruncher.set_seq2(bj)
2025-07-01 05:49:30.323 for i in range(alo, ahi):
2025-07-01 05:49:30.335 ai = a[i]
2025-07-01 05:49:30.344 if ai == bj:
2025-07-01 05:49:30.352 if eqi is None:
2025-07-01 05:49:30.359 eqi, eqj = i, j
2025-07-01 05:49:30.366 continue
2025-07-01 05:49:30.372 cruncher.set_seq1(ai)
2025-07-01 05:49:30.378 # computing similarity is expensive, so use the quick
2025-07-01 05:49:30.384 # upper bounds first -- have seen this speed up messy
2025-07-01 05:49:30.396 # compares by a factor of 3.
2025-07-01 05:49:30.405 # note that ratio() is only expensive to compute the first
2025-07-01 05:49:30.413 # time it's called on a sequence pair; the expensive part
2025-07-01 05:49:30.419 # of the computation is cached by cruncher
2025-07-01 05:49:30.425 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:49:30.430 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:49:30.436 cruncher.ratio() > best_ratio:
2025-07-01 05:49:30.442 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:49:30.448 if best_ratio < cutoff:
2025-07-01 05:49:30.453 # no non-identical "pretty close" pair
2025-07-01 05:49:30.459 if eqi is None:
2025-07-01 05:49:30.465 # no identical pair either -- treat it as a straight replace
2025-07-01 05:49:30.471 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:49:30.483 return
2025-07-01 05:49:30.492 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:49:30.500 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:49:30.508 else:
2025-07-01 05:49:30.516 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:49:30.522 eqi = None
2025-07-01 05:49:30.530
2025-07-01 05:49:30.537 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:49:30.543 # identical
2025-07-01 05:49:30.548
2025-07-01 05:49:30.554 # pump out diffs from before the synch point
2025-07-01 05:49:30.564 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:49:30.573
2025-07-01 05:49:30.584 # do intraline marking on the synch pair
2025-07-01 05:49:30.596 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:49:30.606 if eqi is None:
2025-07-01 05:49:30.617 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:49:30.629 atags = btags = ""
2025-07-01 05:49:30.639 cruncher.set_seqs(aelt, belt)
2025-07-01 05:49:30.646 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:49:30.657 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:49:30.665 if tag == 'replace':
2025-07-01 05:49:30.673 atags += '^' * la
2025-07-01 05:49:30.679 btags += '^' * lb
2025-07-01 05:49:30.687 elif tag == 'delete':
2025-07-01 05:49:30.698 atags += '-' * la
2025-07-01 05:49:30.706 elif tag == 'insert':
2025-07-01 05:49:30.713 btags += '+' * lb
2025-07-01 05:49:30.719 elif tag == 'equal':
2025-07-01 05:49:30.726 atags += ' ' * la
2025-07-01 05:49:30.733 btags += ' ' * lb
2025-07-01 05:49:30.744 else:
2025-07-01 05:49:30.757 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:49:30.770 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:49:30.779 else:
2025-07-01 05:49:30.786 # the synch pair is identical
2025-07-01 05:49:30.792 yield ' ' + aelt
2025-07-01 05:49:30.797
2025-07-01 05:49:30.804 # pump out diffs from after the synch point
2025-07-01 05:49:30.810 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:49:30.817
2025-07-01 05:49:30.823 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:49:30.830 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:49:30.840
2025-07-01 05:49:30.850 self = <difflib.Differ object at [hex]>
2025-07-01 05:49:30.857 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:49:30.863 alo = 373, ahi = 1101
2025-07-01 05:49:30.869 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:49:30.874 blo = 373, bhi = 1101
2025-07-01 05:49:30.878
2025-07-01 05:49:30.884 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:49:30.889 g = []
2025-07-01 05:49:30.894 if alo < ahi:
2025-07-01 05:49:30.900 if blo < bhi:
2025-07-01 05:49:30.906 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:49:30.912 else:
2025-07-01 05:49:30.919 g = self._dump('-', a, alo, ahi)
2025-07-01 05:49:30.926 elif blo < bhi:
2025-07-01 05:49:30.938 g = self._dump('+', b, blo, bhi)
2025-07-01 05:49:30.946
2025-07-01 05:49:30.952 > yield from g
2025-07-01 05:49:30.958
2025-07-01 05:49:30.965 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:49:30.972 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:49:30.978
2025-07-01 05:49:30.985 self = <difflib.Differ object at [hex]>
2025-07-01 05:49:30.997 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:49:31.006 alo = 373, ahi = 1101
2025-07-01 05:49:31.014 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:49:31.020 blo = 373, bhi = 1101
2025-07-01 05:49:31.026
2025-07-01 05:49:31.035 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:49:31.041 r"""
2025-07-01 05:49:31.047 When replacing one block of lines with another, search the blocks
2025-07-01 05:49:31.055 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:49:31.065 synch point, and intraline difference marking is done on the
2025-07-01 05:49:31.073 similar pair. Lots of work, but often worth it.
2025-07-01 05:49:31.079
2025-07-01 05:49:31.084 Example:
2025-07-01 05:49:31.089
2025-07-01 05:49:31.095 >>> d = Differ()
2025-07-01 05:49:31.100 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:49:31.106 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:49:31.117 >>> print(''.join(results), end="")
2025-07-01 05:49:31.123 - abcDefghiJkl
2025-07-01 05:49:31.139 + abcdefGhijkl
2025-07-01 05:49:31.161 """
2025-07-01 05:49:31.168
2025-07-01 05:49:31.175 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:49:31.182 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:49:31.189 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:49:31.196 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:49:31.202 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:49:31.208
2025-07-01 05:49:31.214 # search for the pair that matches best without being identical
2025-07-01 05:49:31.221 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:49:31.227 # on junk -- unless we have to)
2025-07-01 05:49:31.233 for j in range(blo, bhi):
2025-07-01 05:49:31.239 bj = b[j]
2025-07-01 05:49:31.245 cruncher.set_seq2(bj)
2025-07-01 05:49:31.251 for i in range(alo, ahi):
2025-07-01 05:49:31.258 ai = a[i]
2025-07-01 05:49:31.266 if ai == bj:
2025-07-01 05:49:31.274 if eqi is None:
2025-07-01 05:49:31.279 eqi, eqj = i, j
2025-07-01 05:49:31.284 continue
2025-07-01 05:49:31.290 cruncher.set_seq1(ai)
2025-07-01 05:49:31.295 # computing similarity is expensive, so use the quick
2025-07-01 05:49:31.300 # upper bounds first -- have seen this speed up messy
2025-07-01 05:49:31.306 # compares by a factor of 3.
2025-07-01 05:49:31.312 # note that ratio() is only expensive to compute the first
2025-07-01 05:49:31.318 # time it's called on a sequence pair; the expensive part
2025-07-01 05:49:31.324 # of the computation is cached by cruncher
2025-07-01 05:49:31.330 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:49:31.336 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:49:31.342 cruncher.ratio() > best_ratio:
2025-07-01 05:49:31.348 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:49:31.354 if best_ratio < cutoff:
2025-07-01 05:49:31.360 # no non-identical "pretty close" pair
2025-07-01 05:49:31.366 if eqi is None:
2025-07-01 05:49:31.374 # no identical pair either -- treat it as a straight replace
2025-07-01 05:49:31.386 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:49:31.394 return
2025-07-01 05:49:31.400 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:49:31.406 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:49:31.418 else:
2025-07-01 05:49:31.425 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:49:31.433 eqi = None
2025-07-01 05:49:31.440
2025-07-01 05:49:31.447 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:49:31.454 # identical
2025-07-01 05:49:31.461
2025-07-01 05:49:31.468 # pump out diffs from before the synch point
2025-07-01 05:49:31.474 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:49:31.480
2025-07-01 05:49:31.488 # do intraline marking on the synch pair
2025-07-01 05:49:31.494 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:49:31.499 if eqi is None:
2025-07-01 05:49:31.506 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:49:31.512 atags = btags = ""
2025-07-01 05:49:31.517 cruncher.set_seqs(aelt, belt)
2025-07-01 05:49:31.521 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:49:31.527 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:49:31.532 if tag == 'replace':
2025-07-01 05:49:31.539 atags += '^' * la
2025-07-01 05:49:31.547 btags += '^' * lb
2025-07-01 05:49:31.555 elif tag == 'delete':
2025-07-01 05:49:31.561 atags += '-' * la
2025-07-01 05:49:31.569 elif tag == 'insert':
2025-07-01 05:49:31.582 btags += '+' * lb
2025-07-01 05:49:31.593 elif tag == 'equal':
2025-07-01 05:49:31.601 atags += ' ' * la
2025-07-01 05:49:31.608 btags += ' ' * lb
2025-07-01 05:49:31.614 else:
2025-07-01 05:49:31.621 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:49:31.625 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:49:31.630 else:
2025-07-01 05:49:31.635 # the synch pair is identical
2025-07-01 05:49:31.640 yield ' ' + aelt
2025-07-01 05:49:31.645
2025-07-01 05:49:31.651 # pump out diffs from after the synch point
2025-07-01 05:49:31.658 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:49:31.663
2025-07-01 05:49:31.670 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:49:31.678 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:49:31.684
2025-07-01 05:49:31.690 self = <difflib.Differ object at [hex]>
2025-07-01 05:49:31.697 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:49:31.703 alo = 374, ahi = 1101
2025-07-01 05:49:31.712 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:49:31.724 blo = 374, bhi = 1101
2025-07-01 05:49:31.729
2025-07-01 05:49:31.738 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:49:31.746 g = []
2025-07-01 05:49:31.753 if alo < ahi:
2025-07-01 05:49:31.761 if blo < bhi:
2025-07-01 05:49:31.769 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:49:31.776 else:
2025-07-01 05:49:31.783 g = self._dump('-', a, alo, ahi)
2025-07-01 05:49:31.790 elif blo < bhi:
2025-07-01 05:49:31.797 g = self._dump('+', b, blo, bhi)
2025-07-01 05:49:31.808
2025-07-01 05:49:31.817 > yield from g
2025-07-01 05:49:31.824
2025-07-01 05:49:31.834 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:49:31.841 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:49:31.849
2025-07-01 05:49:31.861 self = <difflib.Differ object at [hex]>
2025-07-01 05:49:31.869 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:49:31.877 alo = 374, ahi = 1101
2025-07-01 05:49:31.883 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:49:31.891 blo = 374, bhi = 1101
2025-07-01 05:49:31.901
2025-07-01 05:49:31.910 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:49:31.916 r"""
2025-07-01 05:49:31.922 When replacing one block of lines with another, search the blocks
2025-07-01 05:49:31.929 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:49:31.936 synch point, and intraline difference marking is done on the
2025-07-01 05:49:31.943 similar pair. Lots of work, but often worth it.
2025-07-01 05:49:31.949
2025-07-01 05:49:31.955 Example:
2025-07-01 05:49:31.961
2025-07-01 05:49:31.967 >>> d = Differ()
2025-07-01 05:49:31.973 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:49:31.979 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:49:31.985 >>> print(''.join(results), end="")
2025-07-01 05:49:31.991 - abcDefghiJkl
2025-07-01 05:49:32.004 + abcdefGhijkl
2025-07-01 05:49:32.021 """
2025-07-01 05:49:32.028
2025-07-01 05:49:32.035 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:49:32.042 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:49:32.050 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:49:32.057 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:49:32.063 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:49:32.069
2025-07-01 05:49:32.075 # search for the pair that matches best without being identical
2025-07-01 05:49:32.081 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:49:32.088 # on junk -- unless we have to)
2025-07-01 05:49:32.094 for j in range(blo, bhi):
2025-07-01 05:49:32.100 bj = b[j]
2025-07-01 05:49:32.107 cruncher.set_seq2(bj)
2025-07-01 05:49:32.114 for i in range(alo, ahi):
2025-07-01 05:49:32.122 ai = a[i]
2025-07-01 05:49:32.132 if ai == bj:
2025-07-01 05:49:32.140 if eqi is None:
2025-07-01 05:49:32.147 eqi, eqj = i, j
2025-07-01 05:49:32.152 continue
2025-07-01 05:49:32.158 cruncher.set_seq1(ai)
2025-07-01 05:49:32.164 # computing similarity is expensive, so use the quick
2025-07-01 05:49:32.171 # upper bounds first -- have seen this speed up messy
2025-07-01 05:49:32.183 # compares by a factor of 3.
2025-07-01 05:49:32.193 # note that ratio() is only expensive to compute the first
2025-07-01 05:49:32.203 # time it's called on a sequence pair; the expensive part
2025-07-01 05:49:32.210 # of the computation is cached by cruncher
2025-07-01 05:49:32.217 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:49:32.224 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:49:32.231 cruncher.ratio() > best_ratio:
2025-07-01 05:49:32.239 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:49:32.250 if best_ratio < cutoff:
2025-07-01 05:49:32.258 # no non-identical "pretty close" pair
2025-07-01 05:49:32.264 if eqi is None:
2025-07-01 05:49:32.271 # no identical pair either -- treat it as a straight replace
2025-07-01 05:49:32.278 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:49:32.288 return
2025-07-01 05:49:32.298 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:49:32.305 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:49:32.312 else:
2025-07-01 05:49:32.319 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:49:32.327 eqi = None
2025-07-01 05:49:32.338
2025-07-01 05:49:32.348 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:49:32.355 # identical
2025-07-01 05:49:32.361
2025-07-01 05:49:32.367 # pump out diffs from before the synch point
2025-07-01 05:49:32.373 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:49:32.379
2025-07-01 05:49:32.387 # do intraline marking on the synch pair
2025-07-01 05:49:32.398 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:49:32.407 if eqi is None:
2025-07-01 05:49:32.413 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:49:32.418 atags = btags = ""
2025-07-01 05:49:32.424 cruncher.set_seqs(aelt, belt)
2025-07-01 05:49:32.430 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:49:32.437 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:49:32.444 if tag == 'replace':
2025-07-01 05:49:32.452 atags += '^' * la
2025-07-01 05:49:32.460 btags += '^' * lb
2025-07-01 05:49:32.468 elif tag == 'delete':
2025-07-01 05:49:32.473 atags += '-' * la
2025-07-01 05:49:32.479 elif tag == 'insert':
2025-07-01 05:49:32.484 btags += '+' * lb
2025-07-01 05:49:32.489 elif tag == 'equal':
2025-07-01 05:49:32.494 atags += ' ' * la
2025-07-01 05:49:32.500 btags += ' ' * lb
2025-07-01 05:49:32.506 else:
2025-07-01 05:49:32.512 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:49:32.520 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:49:32.529 else:
2025-07-01 05:49:32.537 # the synch pair is identical
2025-07-01 05:49:32.544 yield ' ' + aelt
2025-07-01 05:49:32.553
2025-07-01 05:49:32.560 # pump out diffs from after the synch point
2025-07-01 05:49:32.567 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:49:32.573
2025-07-01 05:49:32.578 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:49:32.586 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:49:32.594
2025-07-01 05:49:32.601 self = <difflib.Differ object at [hex]>
2025-07-01 05:49:32.610 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:49:32.622 alo = 375, ahi = 1101
2025-07-01 05:49:32.629 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:49:32.635 blo = 375, bhi = 1101
2025-07-01 05:49:32.641
2025-07-01 05:49:32.648 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:49:32.655 g = []
2025-07-01 05:49:32.663 if alo < ahi:
2025-07-01 05:49:32.674 if blo < bhi:
2025-07-01 05:49:32.682 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:49:32.688 else:
2025-07-01 05:49:32.694 g = self._dump('-', a, alo, ahi)
2025-07-01 05:49:32.701 elif blo < bhi:
2025-07-01 05:49:32.713 g = self._dump('+', b, blo, bhi)
2025-07-01 05:49:32.722
2025-07-01 05:49:32.730 > yield from g
2025-07-01 05:49:32.737
2025-07-01 05:49:32.744 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:49:32.750 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:49:32.755
2025-07-01 05:49:32.762 self = <difflib.Differ object at [hex]>
2025-07-01 05:49:32.768 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:49:32.774 alo = 375, ahi = 1101
2025-07-01 05:49:32.781 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:49:32.786 blo = 375, bhi = 1101
2025-07-01 05:49:32.792
2025-07-01 05:49:32.798 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:49:32.804 r"""
2025-07-01 05:49:32.810 When replacing one block of lines with another, search the blocks
2025-07-01 05:49:32.815 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:49:32.821 synch point, and intraline difference marking is done on the
2025-07-01 05:49:32.827 similar pair. Lots of work, but often worth it.
2025-07-01 05:49:32.833
2025-07-01 05:49:32.838 Example:
2025-07-01 05:49:32.844
2025-07-01 05:49:32.850 >>> d = Differ()
2025-07-01 05:49:32.856 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:49:32.862 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:49:32.868 >>> print(''.join(results), end="")
2025-07-01 05:49:32.873 - abcDefghiJkl
2025-07-01 05:49:32.887 + abcdefGhijkl
2025-07-01 05:49:32.905 """
2025-07-01 05:49:32.914
2025-07-01 05:49:32.920 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:49:32.926 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:49:32.931 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:49:32.936 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:49:32.941 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:49:32.946
2025-07-01 05:49:32.950 # search for the pair that matches best without being identical
2025-07-01 05:49:32.958 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:49:32.969 # on junk -- unless we have to)
2025-07-01 05:49:32.976 for j in range(blo, bhi):
2025-07-01 05:49:32.983 bj = b[j]
2025-07-01 05:49:32.988 cruncher.set_seq2(bj)
2025-07-01 05:49:32.993 for i in range(alo, ahi):
2025-07-01 05:49:32.998 ai = a[i]
2025-07-01 05:49:33.006 if ai == bj:
2025-07-01 05:49:33.014 if eqi is None:
2025-07-01 05:49:33.020 eqi, eqj = i, j
2025-07-01 05:49:33.026 continue
2025-07-01 05:49:33.034 cruncher.set_seq1(ai)
2025-07-01 05:49:33.042 # computing similarity is expensive, so use the quick
2025-07-01 05:49:33.054 # upper bounds first -- have seen this speed up messy
2025-07-01 05:49:33.062 # compares by a factor of 3.
2025-07-01 05:49:33.069 # note that ratio() is only expensive to compute the first
2025-07-01 05:49:33.075 # time it's called on a sequence pair; the expensive part
2025-07-01 05:49:33.082 # of the computation is cached by cruncher
2025-07-01 05:49:33.092 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:49:33.102 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:49:33.109 cruncher.ratio() > best_ratio:
2025-07-01 05:49:33.116 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:49:33.121 if best_ratio < cutoff:
2025-07-01 05:49:33.126 # no non-identical "pretty close" pair
2025-07-01 05:49:33.130 if eqi is None:
2025-07-01 05:49:33.135 # no identical pair either -- treat it as a straight replace
2025-07-01 05:49:33.141 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:49:33.146 return
2025-07-01 05:49:33.152 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:49:33.158 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:49:33.164 else:
2025-07-01 05:49:33.170 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:49:33.178 eqi = None
2025-07-01 05:49:33.189
2025-07-01 05:49:33.197 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:49:33.203 # identical
2025-07-01 05:49:33.208
2025-07-01 05:49:33.213 # pump out diffs from before the synch point
2025-07-01 05:49:33.218 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:49:33.223
2025-07-01 05:49:33.228 # do intraline marking on the synch pair
2025-07-01 05:49:33.234 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:49:33.240 if eqi is None:
2025-07-01 05:49:33.246 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:49:33.253 atags = btags = ""
2025-07-01 05:49:33.259 cruncher.set_seqs(aelt, belt)
2025-07-01 05:49:33.265 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:49:33.271 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:49:33.277 if tag == 'replace':
2025-07-01 05:49:33.282 atags += '^' * la
2025-07-01 05:49:33.288 btags += '^' * lb
2025-07-01 05:49:33.294 elif tag == 'delete':
2025-07-01 05:49:33.299 atags += '-' * la
2025-07-01 05:49:33.311 elif tag == 'insert':
2025-07-01 05:49:33.321 btags += '+' * lb
2025-07-01 05:49:33.328 elif tag == 'equal':
2025-07-01 05:49:33.336 atags += ' ' * la
2025-07-01 05:49:33.343 btags += ' ' * lb
2025-07-01 05:49:33.348 else:
2025-07-01 05:49:33.353 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:49:33.358 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:49:33.363 else:
2025-07-01 05:49:33.367 # the synch pair is identical
2025-07-01 05:49:33.372 yield ' ' + aelt
2025-07-01 05:49:33.376
2025-07-01 05:49:33.388 # pump out diffs from after the synch point
2025-07-01 05:49:33.395 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:49:33.401
2025-07-01 05:49:33.407 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:49:33.418 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:49:33.425
2025-07-01 05:49:33.431 self = <difflib.Differ object at [hex]>
2025-07-01 05:49:33.438 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:49:33.448 alo = 378, ahi = 1101
2025-07-01 05:49:33.460 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:49:33.473 blo = 378, bhi = 1101
2025-07-01 05:49:33.483
2025-07-01 05:49:33.493 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:49:33.500 g = []
2025-07-01 05:49:33.506 if alo < ahi:
2025-07-01 05:49:33.512 if blo < bhi:
2025-07-01 05:49:33.518 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:49:33.523 else:
2025-07-01 05:49:33.527 g = self._dump('-', a, alo, ahi)
2025-07-01 05:49:33.532 elif blo < bhi:
2025-07-01 05:49:33.536 g = self._dump('+', b, blo, bhi)
2025-07-01 05:49:33.541
2025-07-01 05:49:33.545 > yield from g
2025-07-01 05:49:33.550
2025-07-01 05:49:33.554 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:49:33.559 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:49:33.563
2025-07-01 05:49:33.568 self = <difflib.Differ object at [hex]>
2025-07-01 05:49:33.573 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:49:33.578 alo = 378, ahi = 1101
2025-07-01 05:49:33.582 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:49:33.587 blo = 378, bhi = 1101
2025-07-01 05:49:33.591
2025-07-01 05:49:33.596 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:49:33.600 r"""
2025-07-01 05:49:33.605 When replacing one block of lines with another, search the blocks
2025-07-01 05:49:33.609 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:49:33.614 synch point, and intraline difference marking is done on the
2025-07-01 05:49:33.618 similar pair. Lots of work, but often worth it.
2025-07-01 05:49:33.622
2025-07-01 05:49:33.627 Example:
2025-07-01 05:49:33.641
2025-07-01 05:49:33.651 >>> d = Differ()
2025-07-01 05:49:33.662 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:49:33.673 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:49:33.684 >>> print(''.join(results), end="")
2025-07-01 05:49:33.697 - abcDefghiJkl
2025-07-01 05:49:33.721 + abcdefGhijkl
2025-07-01 05:49:33.742 """
2025-07-01 05:49:33.748
2025-07-01 05:49:33.753 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:49:33.758 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:49:33.766 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:49:33.771 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:49:33.782 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:49:33.791
2025-07-01 05:49:33.797 # search for the pair that matches best without being identical
2025-07-01 05:49:33.803 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:49:33.809 # on junk -- unless we have to)
2025-07-01 05:49:33.814 for j in range(blo, bhi):
2025-07-01 05:49:33.820 bj = b[j]
2025-07-01 05:49:33.826 cruncher.set_seq2(bj)
2025-07-01 05:49:33.832 for i in range(alo, ahi):
2025-07-01 05:49:33.837 ai = a[i]
2025-07-01 05:49:33.843 if ai == bj:
2025-07-01 05:49:33.849 if eqi is None:
2025-07-01 05:49:33.854 eqi, eqj = i, j
2025-07-01 05:49:33.864 continue
2025-07-01 05:49:33.875 cruncher.set_seq1(ai)
2025-07-01 05:49:33.883 # computing similarity is expensive, so use the quick
2025-07-01 05:49:33.889 # upper bounds first -- have seen this speed up messy
2025-07-01 05:49:33.895 # compares by a factor of 3.
2025-07-01 05:49:33.903 # note that ratio() is only expensive to compute the first
2025-07-01 05:49:33.914 # time it's called on a sequence pair; the expensive part
2025-07-01 05:49:33.923 # of the computation is cached by cruncher
2025-07-01 05:49:33.931 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:49:33.940 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:49:33.948 cruncher.ratio() > best_ratio:
2025-07-01 05:49:33.960 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:49:33.973 if best_ratio < cutoff:
2025-07-01 05:49:33.986 # no non-identical "pretty close" pair
2025-07-01 05:49:33.997 if eqi is None:
2025-07-01 05:49:34.007 # no identical pair either -- treat it as a straight replace
2025-07-01 05:49:34.014 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:49:34.020 return
2025-07-01 05:49:34.026 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:49:34.031 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:49:34.035 else:
2025-07-01 05:49:34.040 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:49:34.044 eqi = None
2025-07-01 05:49:34.049
2025-07-01 05:49:34.053 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:49:34.058 # identical
2025-07-01 05:49:34.062
2025-07-01 05:49:34.067 # pump out diffs from before the synch point
2025-07-01 05:49:34.071 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:49:34.076
2025-07-01 05:49:34.080 # do intraline marking on the synch pair
2025-07-01 05:49:34.086 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:49:34.091 if eqi is None:
2025-07-01 05:49:34.098 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:49:34.104 atags = btags = ""
2025-07-01 05:49:34.110 cruncher.set_seqs(aelt, belt)
2025-07-01 05:49:34.116 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:49:34.121 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:49:34.126 if tag == 'replace':
2025-07-01 05:49:34.132 atags += '^' * la
2025-07-01 05:49:34.137 btags += '^' * lb
2025-07-01 05:49:34.142 elif tag == 'delete':
2025-07-01 05:49:34.147 atags += '-' * la
2025-07-01 05:49:34.154 elif tag == 'insert':
2025-07-01 05:49:34.164 btags += '+' * lb
2025-07-01 05:49:34.172 elif tag == 'equal':
2025-07-01 05:49:34.179 atags += ' ' * la
2025-07-01 05:49:34.186 btags += ' ' * lb
2025-07-01 05:49:34.198 else:
2025-07-01 05:49:34.206 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:49:34.218 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:49:34.225 else:
2025-07-01 05:49:34.230 # the synch pair is identical
2025-07-01 05:49:34.240 yield ' ' + aelt
2025-07-01 05:49:34.249
2025-07-01 05:49:34.256 # pump out diffs from after the synch point
2025-07-01 05:49:34.264 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:49:34.270
2025-07-01 05:49:34.280 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:49:34.292 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:49:34.302
2025-07-01 05:49:34.310 self = <difflib.Differ object at [hex]>
2025-07-01 05:49:34.319 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:49:34.325 alo = 379, ahi = 1101
2025-07-01 05:49:34.337 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:49:34.345 blo = 379, bhi = 1101
2025-07-01 05:49:34.352
2025-07-01 05:49:34.364 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:49:34.376 g = []
2025-07-01 05:49:34.385 if alo < ahi:
2025-07-01 05:49:34.391 if blo < bhi:
2025-07-01 05:49:34.396 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:49:34.401 else:
2025-07-01 05:49:34.406 g = self._dump('-', a, alo, ahi)
2025-07-01 05:49:34.410 elif blo < bhi:
2025-07-01 05:49:34.414 g = self._dump('+', b, blo, bhi)
2025-07-01 05:49:34.422
2025-07-01 05:49:34.428 > yield from g
2025-07-01 05:49:34.434
2025-07-01 05:49:34.440 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:49:34.446 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:49:34.451
2025-07-01 05:49:34.456 self = <difflib.Differ object at [hex]>
2025-07-01 05:49:34.461 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:49:34.466 alo = 379, ahi = 1101
2025-07-01 05:49:34.471 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:49:34.476 blo = 379, bhi = 1101
2025-07-01 05:49:34.480
2025-07-01 05:49:34.485 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:49:34.491 r"""
2025-07-01 05:49:34.497 When replacing one block of lines with another, search the blocks
2025-07-01 05:49:34.504 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:49:34.511 synch point, and intraline difference marking is done on the
2025-07-01 05:49:34.517 similar pair. Lots of work, but often worth it.
2025-07-01 05:49:34.523
2025-07-01 05:49:34.529 Example:
2025-07-01 05:49:34.536
2025-07-01 05:49:34.544 >>> d = Differ()
2025-07-01 05:49:34.550 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:49:34.557 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:49:34.565 >>> print(''.join(results), end="")
2025-07-01 05:49:34.573 - abcDefghiJkl
2025-07-01 05:49:34.590 + abcdefGhijkl
2025-07-01 05:49:34.615 """
2025-07-01 05:49:34.624
2025-07-01 05:49:34.633 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:49:34.645 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:49:34.656 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:49:34.668 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:49:34.679 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:49:34.691
2025-07-01 05:49:34.700 # search for the pair that matches best without being identical
2025-07-01 05:49:34.708 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:49:34.715 # on junk -- unless we have to)
2025-07-01 05:49:34.722 for j in range(blo, bhi):
2025-07-01 05:49:34.729 bj = b[j]
2025-07-01 05:49:34.736 cruncher.set_seq2(bj)
2025-07-01 05:49:34.742 for i in range(alo, ahi):
2025-07-01 05:49:34.748 ai = a[i]
2025-07-01 05:49:34.754 if ai == bj:
2025-07-01 05:49:34.760 if eqi is None:
2025-07-01 05:49:34.766 eqi, eqj = i, j
2025-07-01 05:49:34.771 continue
2025-07-01 05:49:34.777 cruncher.set_seq1(ai)
2025-07-01 05:49:34.784 # computing similarity is expensive, so use the quick
2025-07-01 05:49:34.791 # upper bounds first -- have seen this speed up messy
2025-07-01 05:49:34.797 # compares by a factor of 3.
2025-07-01 05:49:34.808 # note that ratio() is only expensive to compute the first
2025-07-01 05:49:34.817 # time it's called on a sequence pair; the expensive part
2025-07-01 05:49:34.825 # of the computation is cached by cruncher
2025-07-01 05:49:34.832 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:49:34.838 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:49:34.844 cruncher.ratio() > best_ratio:
2025-07-01 05:49:34.851 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:49:34.859 if best_ratio < cutoff:
2025-07-01 05:49:34.869 # no non-identical "pretty close" pair
2025-07-01 05:49:34.877 if eqi is None:
2025-07-01 05:49:34.884 # no identical pair either -- treat it as a straight replace
2025-07-01 05:49:34.890 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:49:34.896 return
2025-07-01 05:49:34.901 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:49:34.906 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:49:34.910 else:
2025-07-01 05:49:34.916 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:49:34.921 eqi = None
2025-07-01 05:49:34.927
2025-07-01 05:49:34.933 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:49:34.939 # identical
2025-07-01 05:49:34.944
2025-07-01 05:49:34.950 # pump out diffs from before the synch point
2025-07-01 05:49:34.955 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:49:34.961
2025-07-01 05:49:34.967 # do intraline marking on the synch pair
2025-07-01 05:49:34.974 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:49:34.986 if eqi is None:
2025-07-01 05:49:34.996 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:49:35.004 atags = btags = ""
2025-07-01 05:49:35.010 cruncher.set_seqs(aelt, belt)
2025-07-01 05:49:35.016 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:49:35.022 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:49:35.028 if tag == 'replace':
2025-07-01 05:49:35.034 atags += '^' * la
2025-07-01 05:49:35.043 btags += '^' * lb
2025-07-01 05:49:35.051 elif tag == 'delete':
2025-07-01 05:49:35.059 atags += '-' * la
2025-07-01 05:49:35.065 elif tag == 'insert':
2025-07-01 05:49:35.072 btags += '+' * lb
2025-07-01 05:49:35.079 elif tag == 'equal':
2025-07-01 05:49:35.087 atags += ' ' * la
2025-07-01 05:49:35.094 btags += ' ' * lb
2025-07-01 05:49:35.102 else:
2025-07-01 05:49:35.114 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:49:35.126 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:49:35.137 else:
2025-07-01 05:49:35.146 # the synch pair is identical
2025-07-01 05:49:35.152 yield ' ' + aelt
2025-07-01 05:49:35.158
2025-07-01 05:49:35.166 # pump out diffs from after the synch point
2025-07-01 05:49:35.174 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:49:35.180
2025-07-01 05:49:35.186 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:49:35.191 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:49:35.197
2025-07-01 05:49:35.205 self = <difflib.Differ object at [hex]>
2025-07-01 05:49:35.215 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:49:35.221 alo = 380, ahi = 1101
2025-07-01 05:49:35.229 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:49:35.234 blo = 380, bhi = 1101
2025-07-01 05:49:35.244
2025-07-01 05:49:35.255 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:49:35.263 g = []
2025-07-01 05:49:35.270 if alo < ahi:
2025-07-01 05:49:35.281 if blo < bhi:
2025-07-01 05:49:35.293 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:49:35.303 else:
2025-07-01 05:49:35.315 g = self._dump('-', a, alo, ahi)
2025-07-01 05:49:35.327 elif blo < bhi:
2025-07-01 05:49:35.338 g = self._dump('+', b, blo, bhi)
2025-07-01 05:49:35.348
2025-07-01 05:49:35.358 > yield from g
2025-07-01 05:49:35.369
2025-07-01 05:49:35.382 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:49:35.395 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:49:35.405
2025-07-01 05:49:35.417 self = <difflib.Differ object at [hex]>
2025-07-01 05:49:35.428 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:49:35.436 alo = 380, ahi = 1101
2025-07-01 05:49:35.449 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:49:35.460 blo = 380, bhi = 1101
2025-07-01 05:49:35.474
2025-07-01 05:49:35.484 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:49:35.493 r"""
2025-07-01 05:49:35.500 When replacing one block of lines with another, search the blocks
2025-07-01 05:49:35.506 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:49:35.512 synch point, and intraline difference marking is done on the
2025-07-01 05:49:35.518 similar pair. Lots of work, but often worth it.
2025-07-01 05:49:35.523
2025-07-01 05:49:35.529 Example:
2025-07-01 05:49:35.534
2025-07-01 05:49:35.538 >>> d = Differ()
2025-07-01 05:49:35.544 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:49:35.553 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:49:35.559 >>> print(''.join(results), end="")
2025-07-01 05:49:35.566 - abcDefghiJkl
2025-07-01 05:49:35.581 + abcdefGhijkl
2025-07-01 05:49:35.596 """
2025-07-01 05:49:35.607
2025-07-01 05:49:35.614 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:49:35.620 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:49:35.627 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:49:35.635 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:49:35.646 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:49:35.655
2025-07-01 05:49:35.663 # search for the pair that matches best without being identical
2025-07-01 05:49:35.671 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:49:35.683 # on junk -- unless we have to)
2025-07-01 05:49:35.691 for j in range(blo, bhi):
2025-07-01 05:49:35.703 bj = b[j]
2025-07-01 05:49:35.713 cruncher.set_seq2(bj)
2025-07-01 05:49:35.724 for i in range(alo, ahi):
2025-07-01 05:49:35.735 ai = a[i]
2025-07-01 05:49:35.744 if ai == bj:
2025-07-01 05:49:35.752 if eqi is None:
2025-07-01 05:49:35.759 eqi, eqj = i, j
2025-07-01 05:49:35.765 continue
2025-07-01 05:49:35.771 cruncher.set_seq1(ai)
2025-07-01 05:49:35.779 # computing similarity is expensive, so use the quick
2025-07-01 05:49:35.790 # upper bounds first -- have seen this speed up messy
2025-07-01 05:49:35.801 # compares by a factor of 3.
2025-07-01 05:49:35.812 # note that ratio() is only expensive to compute the first
2025-07-01 05:49:35.823 # time it's called on a sequence pair; the expensive part
2025-07-01 05:49:35.834 # of the computation is cached by cruncher
2025-07-01 05:49:35.848 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:49:35.859 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:49:35.868 cruncher.ratio() > best_ratio:
2025-07-01 05:49:35.875 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:49:35.884 if best_ratio < cutoff:
2025-07-01 05:49:35.890 # no non-identical "pretty close" pair
2025-07-01 05:49:35.896 if eqi is None:
2025-07-01 05:49:35.903 # no identical pair either -- treat it as a straight replace
2025-07-01 05:49:35.909 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:49:35.916 return
2025-07-01 05:49:35.923 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:49:35.930 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:49:35.936 else:
2025-07-01 05:49:35.943 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:49:35.951 eqi = None
2025-07-01 05:49:35.960
2025-07-01 05:49:35.969 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:49:35.981 # identical
2025-07-01 05:49:35.991
2025-07-01 05:49:36.002 # pump out diffs from before the synch point
2025-07-01 05:49:36.011 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:49:36.018
2025-07-01 05:49:36.029 # do intraline marking on the synch pair
2025-07-01 05:49:36.039 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:49:36.046 if eqi is None:
2025-07-01 05:49:36.056 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:49:36.065 atags = btags = ""
2025-07-01 05:49:36.072 cruncher.set_seqs(aelt, belt)
2025-07-01 05:49:36.082 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:49:36.093 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:49:36.100 if tag == 'replace':
2025-07-01 05:49:36.107 atags += '^' * la
2025-07-01 05:49:36.113 btags += '^' * lb
2025-07-01 05:49:36.119 elif tag == 'delete':
2025-07-01 05:49:36.127 atags += '-' * la
2025-07-01 05:49:36.138 elif tag == 'insert':
2025-07-01 05:49:36.145 btags += '+' * lb
2025-07-01 05:49:36.152 elif tag == 'equal':
2025-07-01 05:49:36.157 atags += ' ' * la
2025-07-01 05:49:36.163 btags += ' ' * lb
2025-07-01 05:49:36.168 else:
2025-07-01 05:49:36.174 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:49:36.180 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:49:36.186 else:
2025-07-01 05:49:36.192 # the synch pair is identical
2025-07-01 05:49:36.199 yield ' ' + aelt
2025-07-01 05:49:36.206
2025-07-01 05:49:36.216 # pump out diffs from after the synch point
2025-07-01 05:49:36.226 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:49:36.233
2025-07-01 05:49:36.239 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:49:36.246 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:49:36.252
2025-07-01 05:49:36.259 self = <difflib.Differ object at [hex]>
2025-07-01 05:49:36.268 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:49:36.275 alo = 381, ahi = 1101
2025-07-01 05:49:36.282 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:49:36.289 blo = 381, bhi = 1101
2025-07-01 05:49:36.296
2025-07-01 05:49:36.303 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:49:36.311 g = []
2025-07-01 05:49:36.321 if alo < ahi:
2025-07-01 05:49:36.329 if blo < bhi:
2025-07-01 05:49:36.336 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:49:36.342 else:
2025-07-01 05:49:36.351 g = self._dump('-', a, alo, ahi)
2025-07-01 05:49:36.358 elif blo < bhi:
2025-07-01 05:49:36.363 g = self._dump('+', b, blo, bhi)
2025-07-01 05:49:36.368
2025-07-01 05:49:36.372 > yield from g
2025-07-01 05:49:36.379
2025-07-01 05:49:36.389 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:49:36.398 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:49:36.407
2025-07-01 05:49:36.417 self = <difflib.Differ object at [hex]>
2025-07-01 05:49:36.426 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:49:36.435 alo = 381, ahi = 1101
2025-07-01 05:49:36.443 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:49:36.451 blo = 381, bhi = 1101
2025-07-01 05:49:36.462
2025-07-01 05:49:36.473 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:49:36.482 r"""
2025-07-01 05:49:36.490 When replacing one block of lines with another, search the blocks
2025-07-01 05:49:36.497 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:49:36.503 synch point, and intraline difference marking is done on the
2025-07-01 05:49:36.513 similar pair. Lots of work, but often worth it.
2025-07-01 05:49:36.520
2025-07-01 05:49:36.528 Example:
2025-07-01 05:49:36.535
2025-07-01 05:49:36.541 >>> d = Differ()
2025-07-01 05:49:36.546 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:49:36.552 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:49:36.557 >>> print(''.join(results), end="")
2025-07-01 05:49:36.562 - abcDefghiJkl
2025-07-01 05:49:36.574 + abcdefGhijkl
2025-07-01 05:49:36.590 """
2025-07-01 05:49:36.598
2025-07-01 05:49:36.608 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:49:36.617 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:49:36.624 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:49:36.630 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:49:36.636 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:49:36.643
2025-07-01 05:49:36.650 # search for the pair that matches best without being identical
2025-07-01 05:49:36.659 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:49:36.665 # on junk -- unless we have to)
2025-07-01 05:49:36.675 for j in range(blo, bhi):
2025-07-01 05:49:36.683 bj = b[j]
2025-07-01 05:49:36.691 cruncher.set_seq2(bj)
2025-07-01 05:49:36.699 for i in range(alo, ahi):
2025-07-01 05:49:36.709 ai = a[i]
2025-07-01 05:49:36.717 if ai == bj:
2025-07-01 05:49:36.723 if eqi is None:
2025-07-01 05:49:36.729 eqi, eqj = i, j
2025-07-01 05:49:36.734 continue
2025-07-01 05:49:36.744 cruncher.set_seq1(ai)
2025-07-01 05:49:36.750 # computing similarity is expensive, so use the quick
2025-07-01 05:49:36.757 # upper bounds first -- have seen this speed up messy
2025-07-01 05:49:36.765 # compares by a factor of 3.
2025-07-01 05:49:36.775 # note that ratio() is only expensive to compute the first
2025-07-01 05:49:36.782 # time it's called on a sequence pair; the expensive part
2025-07-01 05:49:36.788 # of the computation is cached by cruncher
2025-07-01 05:49:36.795 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:49:36.803 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:49:36.814 cruncher.ratio() > best_ratio:
2025-07-01 05:49:36.822 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:49:36.829 if best_ratio < cutoff:
2025-07-01 05:49:36.834 # no non-identical "pretty close" pair
2025-07-01 05:49:36.840 if eqi is None:
2025-07-01 05:49:36.845 # no identical pair either -- treat it as a straight replace
2025-07-01 05:49:36.850 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:49:36.854 return
2025-07-01 05:49:36.859 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:49:36.864 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:49:36.869 else:
2025-07-01 05:49:36.874 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:49:36.888 eqi = None
2025-07-01 05:49:36.895
2025-07-01 05:49:36.904 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:49:36.912 # identical
2025-07-01 05:49:36.917
2025-07-01 05:49:36.923 # pump out diffs from before the synch point
2025-07-01 05:49:36.928 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:49:36.932
2025-07-01 05:49:36.941 # do intraline marking on the synch pair
2025-07-01 05:49:36.949 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:49:36.955 if eqi is None:
2025-07-01 05:49:36.962 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:49:36.972 atags = btags = ""
2025-07-01 05:49:36.983 cruncher.set_seqs(aelt, belt)
2025-07-01 05:49:36.991 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:49:36.997 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:49:37.003 if tag == 'replace':
2025-07-01 05:49:37.009 atags += '^' * la
2025-07-01 05:49:37.015 btags += '^' * lb
2025-07-01 05:49:37.023 elif tag == 'delete':
2025-07-01 05:49:37.034 atags += '-' * la
2025-07-01 05:49:37.042 elif tag == 'insert':
2025-07-01 05:49:37.048 btags += '+' * lb
2025-07-01 05:49:37.054 elif tag == 'equal':
2025-07-01 05:49:37.058 atags += ' ' * la
2025-07-01 05:49:37.062 btags += ' ' * lb
2025-07-01 05:49:37.067 else:
2025-07-01 05:49:37.076 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:49:37.084 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:49:37.090 else:
2025-07-01 05:49:37.097 # the synch pair is identical
2025-07-01 05:49:37.104 yield ' ' + aelt
2025-07-01 05:49:37.110
2025-07-01 05:49:37.119 # pump out diffs from after the synch point
2025-07-01 05:49:37.130 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:49:37.138
2025-07-01 05:49:37.145 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:49:37.150 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:49:37.155
2025-07-01 05:49:37.160 self = <difflib.Differ object at [hex]>
2025-07-01 05:49:37.165 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:49:37.170 alo = 382, ahi = 1101
2025-07-01 05:49:37.175 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:49:37.180 blo = 382, bhi = 1101
2025-07-01 05:49:37.184
2025-07-01 05:49:37.189 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:49:37.195 g = []
2025-07-01 05:49:37.200 if alo < ahi:
2025-07-01 05:49:37.207 if blo < bhi:
2025-07-01 05:49:37.214 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:49:37.225 else:
2025-07-01 05:49:37.234 g = self._dump('-', a, alo, ahi)
2025-07-01 05:49:37.241 elif blo < bhi:
2025-07-01 05:49:37.253 g = self._dump('+', b, blo, bhi)
2025-07-01 05:49:37.261
2025-07-01 05:49:37.267 > yield from g
2025-07-01 05:49:37.273
2025-07-01 05:49:37.278 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:49:37.283 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:49:37.287
2025-07-01 05:49:37.292 self = <difflib.Differ object at [hex]>
2025-07-01 05:49:37.297 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:49:37.302 alo = 382, ahi = 1101
2025-07-01 05:49:37.307 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:49:37.311 blo = 382, bhi = 1101
2025-07-01 05:49:37.316
2025-07-01 05:49:37.320 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:49:37.325 r"""
2025-07-01 05:49:37.329 When replacing one block of lines with another, search the blocks
2025-07-01 05:49:37.334 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:49:37.339 synch point, and intraline difference marking is done on the
2025-07-01 05:49:37.344 similar pair. Lots of work, but often worth it.
2025-07-01 05:49:37.348
2025-07-01 05:49:37.353 Example:
2025-07-01 05:49:37.357
2025-07-01 05:49:37.363 >>> d = Differ()
2025-07-01 05:49:37.369 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:49:37.374 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:49:37.382 >>> print(''.join(results), end="")
2025-07-01 05:49:37.389 - abcDefghiJkl
2025-07-01 05:49:37.401 + abcdefGhijkl
2025-07-01 05:49:37.412 """
2025-07-01 05:49:37.419
2025-07-01 05:49:37.425 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:49:37.431 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:49:37.438 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:49:37.445 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:49:37.452 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:49:37.458
2025-07-01 05:49:37.467 # search for the pair that matches best without being identical
2025-07-01 05:49:37.477 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:49:37.485 # on junk -- unless we have to)
2025-07-01 05:49:37.491 for j in range(blo, bhi):
2025-07-01 05:49:37.498 bj = b[j]
2025-07-01 05:49:37.505 cruncher.set_seq2(bj)
2025-07-01 05:49:37.513 for i in range(alo, ahi):
2025-07-01 05:49:37.520 ai = a[i]
2025-07-01 05:49:37.527 if ai == bj:
2025-07-01 05:49:37.533 if eqi is None:
2025-07-01 05:49:37.541 eqi, eqj = i, j
2025-07-01 05:49:37.551 continue
2025-07-01 05:49:37.560 cruncher.set_seq1(ai)
2025-07-01 05:49:37.572 # computing similarity is expensive, so use the quick
2025-07-01 05:49:37.583 # upper bounds first -- have seen this speed up messy
2025-07-01 05:49:37.595 # compares by a factor of 3.
2025-07-01 05:49:37.607 # note that ratio() is only expensive to compute the first
2025-07-01 05:49:37.617 # time it's called on a sequence pair; the expensive part
2025-07-01 05:49:37.629 # of the computation is cached by cruncher
2025-07-01 05:49:37.637 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:49:37.644 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:49:37.655 cruncher.ratio() > best_ratio:
2025-07-01 05:49:37.668 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:49:37.680 if best_ratio < cutoff:
2025-07-01 05:49:37.689 # no non-identical "pretty close" pair
2025-07-01 05:49:37.697 if eqi is None:
2025-07-01 05:49:37.704 # no identical pair either -- treat it as a straight replace
2025-07-01 05:49:37.711 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:49:37.720 return
2025-07-01 05:49:37.733 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:49:37.743 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:49:37.752 else:
2025-07-01 05:49:37.764 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:49:37.776 eqi = None
2025-07-01 05:49:37.787
2025-07-01 05:49:37.799 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:49:37.808 # identical
2025-07-01 05:49:37.816
2025-07-01 05:49:37.823 # pump out diffs from before the synch point
2025-07-01 05:49:37.829 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:49:37.835
2025-07-01 05:49:37.847 # do intraline marking on the synch pair
2025-07-01 05:49:37.855 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:49:37.863 if eqi is None:
2025-07-01 05:49:37.872 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:49:37.882 atags = btags = ""
2025-07-01 05:49:37.894 cruncher.set_seqs(aelt, belt)
2025-07-01 05:49:37.903 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:49:37.912 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:49:37.919 if tag == 'replace':
2025-07-01 05:49:37.926 atags += '^' * la
2025-07-01 05:49:37.936 btags += '^' * lb
2025-07-01 05:49:37.946 elif tag == 'delete':
2025-07-01 05:49:37.956 atags += '-' * la
2025-07-01 05:49:37.967 elif tag == 'insert':
2025-07-01 05:49:37.978 btags += '+' * lb
2025-07-01 05:49:37.988 elif tag == 'equal':
2025-07-01 05:49:37.996 atags += ' ' * la
2025-07-01 05:49:38.008 btags += ' ' * lb
2025-07-01 05:49:38.019 else:
2025-07-01 05:49:38.031 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:49:38.041 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:49:38.049 else:
2025-07-01 05:49:38.056 # the synch pair is identical
2025-07-01 05:49:38.063 yield ' ' + aelt
2025-07-01 05:49:38.071
2025-07-01 05:49:38.081 # pump out diffs from after the synch point
2025-07-01 05:49:38.090 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:49:38.097
2025-07-01 05:49:38.104 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:49:38.109 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:49:38.115
2025-07-01 05:49:38.121 self = <difflib.Differ object at [hex]>
2025-07-01 05:49:38.128 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:49:38.140 alo = 383, ahi = 1101
2025-07-01 05:49:38.152 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:49:38.160 blo = 383, bhi = 1101
2025-07-01 05:49:38.167
2025-07-01 05:49:38.174 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:49:38.180 g = []
2025-07-01 05:49:38.186 if alo < ahi:
2025-07-01 05:49:38.196 if blo < bhi:
2025-07-01 05:49:38.205 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:49:38.212 else:
2025-07-01 05:49:38.218 g = self._dump('-', a, alo, ahi)
2025-07-01 05:49:38.224 elif blo < bhi:
2025-07-01 05:49:38.230 g = self._dump('+', b, blo, bhi)
2025-07-01 05:49:38.241
2025-07-01 05:49:38.250 > yield from g
2025-07-01 05:49:38.258
2025-07-01 05:49:38.265 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:49:38.271 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:49:38.277
2025-07-01 05:49:38.282 self = <difflib.Differ object at [hex]>
2025-07-01 05:49:38.296 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:49:38.307 alo = 383, ahi = 1101
2025-07-01 05:49:38.319 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:49:38.328 blo = 383, bhi = 1101
2025-07-01 05:49:38.336
2025-07-01 05:49:38.343 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:49:38.349 r"""
2025-07-01 05:49:38.355 When replacing one block of lines with another, search the blocks
2025-07-01 05:49:38.362 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:49:38.373 synch point, and intraline difference marking is done on the
2025-07-01 05:49:38.385 similar pair. Lots of work, but often worth it.
2025-07-01 05:49:38.394
2025-07-01 05:49:38.402 Example:
2025-07-01 05:49:38.411
2025-07-01 05:49:38.419 >>> d = Differ()
2025-07-01 05:49:38.426 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:49:38.432 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:49:38.438 >>> print(''.join(results), end="")
2025-07-01 05:49:38.449 - abcDefghiJkl
2025-07-01 05:49:38.467 + abcdefGhijkl
2025-07-01 05:49:38.485 """
2025-07-01 05:49:38.493
2025-07-01 05:49:38.499 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:49:38.506 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:49:38.511 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:49:38.517 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:49:38.522 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:49:38.526
2025-07-01 05:49:38.532 # search for the pair that matches best without being identical
2025-07-01 05:49:38.538 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:49:38.543 # on junk -- unless we have to)
2025-07-01 05:49:38.550 for j in range(blo, bhi):
2025-07-01 05:49:38.558 bj = b[j]
2025-07-01 05:49:38.564 cruncher.set_seq2(bj)
2025-07-01 05:49:38.571 for i in range(alo, ahi):
2025-07-01 05:49:38.577 ai = a[i]
2025-07-01 05:49:38.588 if ai == bj:
2025-07-01 05:49:38.599 if eqi is None:
2025-07-01 05:49:38.611 eqi, eqj = i, j
2025-07-01 05:49:38.621 continue
2025-07-01 05:49:38.632 cruncher.set_seq1(ai)
2025-07-01 05:49:38.643 # computing similarity is expensive, so use the quick
2025-07-01 05:49:38.652 # upper bounds first -- have seen this speed up messy
2025-07-01 05:49:38.661 # compares by a factor of 3.
2025-07-01 05:49:38.667 # note that ratio() is only expensive to compute the first
2025-07-01 05:49:38.673 # time it's called on a sequence pair; the expensive part
2025-07-01 05:49:38.683 # of the computation is cached by cruncher
2025-07-01 05:49:38.695 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:49:38.706 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:49:38.715 cruncher.ratio() > best_ratio:
2025-07-01 05:49:38.723 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:49:38.735 if best_ratio < cutoff:
2025-07-01 05:49:38.745 # no non-identical "pretty close" pair
2025-07-01 05:49:38.757 if eqi is None:
2025-07-01 05:49:38.768 # no identical pair either -- treat it as a straight replace
2025-07-01 05:49:38.781 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:49:38.790 return
2025-07-01 05:49:38.798 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:49:38.809 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:49:38.818 else:
2025-07-01 05:49:38.827 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:49:38.835 eqi = None
2025-07-01 05:49:38.843
2025-07-01 05:49:38.850 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:49:38.860 # identical
2025-07-01 05:49:38.869
2025-07-01 05:49:38.881 # pump out diffs from before the synch point
2025-07-01 05:49:38.891 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:49:38.899
2025-07-01 05:49:38.907 # do intraline marking on the synch pair
2025-07-01 05:49:38.914 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:49:38.919 if eqi is None:
2025-07-01 05:49:38.924 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:49:38.929 atags = btags = ""
2025-07-01 05:49:38.935 cruncher.set_seqs(aelt, belt)
2025-07-01 05:49:38.942 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:49:38.950 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:49:38.958 if tag == 'replace':
2025-07-01 05:49:38.964 atags += '^' * la
2025-07-01 05:49:38.970 btags += '^' * lb
2025-07-01 05:49:38.976 elif tag == 'delete':
2025-07-01 05:49:38.982 atags += '-' * la
2025-07-01 05:49:38.987 elif tag == 'insert':
2025-07-01 05:49:38.993 btags += '+' * lb
2025-07-01 05:49:38.998 elif tag == 'equal':
2025-07-01 05:49:39.003 atags += ' ' * la
2025-07-01 05:49:39.008 btags += ' ' * lb
2025-07-01 05:49:39.012 else:
2025-07-01 05:49:39.017 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:49:39.023 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:49:39.028 else:
2025-07-01 05:49:39.034 # the synch pair is identical
2025-07-01 05:49:39.043 yield ' ' + aelt
2025-07-01 05:49:39.052
2025-07-01 05:49:39.060 # pump out diffs from after the synch point
2025-07-01 05:49:39.067 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:49:39.075
2025-07-01 05:49:39.086 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:49:39.095 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:49:39.102
2025-07-01 05:49:39.113 self = <difflib.Differ object at [hex]>
2025-07-01 05:49:39.122 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:49:39.131 alo = 384, ahi = 1101
2025-07-01 05:49:39.140 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:49:39.148 blo = 384, bhi = 1101
2025-07-01 05:49:39.155
2025-07-01 05:49:39.162 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:49:39.173 g = []
2025-07-01 05:49:39.178 if alo < ahi:
2025-07-01 05:49:39.185 if blo < bhi:
2025-07-01 05:49:39.191 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:49:39.197 else:
2025-07-01 05:49:39.202 g = self._dump('-', a, alo, ahi)
2025-07-01 05:49:39.207 elif blo < bhi:
2025-07-01 05:49:39.211 g = self._dump('+', b, blo, bhi)
2025-07-01 05:49:39.216
2025-07-01 05:49:39.221 > yield from g
2025-07-01 05:49:39.226
2025-07-01 05:49:39.231 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:49:39.236 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:49:39.241
2025-07-01 05:49:39.246 self = <difflib.Differ object at [hex]>
2025-07-01 05:49:39.251 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:49:39.255 alo = 384, ahi = 1101
2025-07-01 05:49:39.263 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:49:39.269 blo = 384, bhi = 1101
2025-07-01 05:49:39.275
2025-07-01 05:49:39.282 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:49:39.291 r"""
2025-07-01 05:49:39.298 When replacing one block of lines with another, search the blocks
2025-07-01 05:49:39.304 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:49:39.312 synch point, and intraline difference marking is done on the
2025-07-01 05:49:39.322 similar pair. Lots of work, but often worth it.
2025-07-01 05:49:39.334
2025-07-01 05:49:39.347 Example:
2025-07-01 05:49:39.357
2025-07-01 05:49:39.369 >>> d = Differ()
2025-07-01 05:49:39.377 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:49:39.383 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:49:39.388 >>> print(''.join(results), end="")
2025-07-01 05:49:39.394 - abcDefghiJkl
2025-07-01 05:49:39.409 + abcdefGhijkl
2025-07-01 05:49:39.425 """
2025-07-01 05:49:39.434
2025-07-01 05:49:39.442 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:49:39.448 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:49:39.455 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:49:39.462 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:49:39.472 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:49:39.483
2025-07-01 05:49:39.491 # search for the pair that matches best without being identical
2025-07-01 05:49:39.499 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:49:39.507 # on junk -- unless we have to)
2025-07-01 05:49:39.513 for j in range(blo, bhi):
2025-07-01 05:49:39.520 bj = b[j]
2025-07-01 05:49:39.527 cruncher.set_seq2(bj)
2025-07-01 05:49:39.535 for i in range(alo, ahi):
2025-07-01 05:49:39.541 ai = a[i]
2025-07-01 05:49:39.550 if ai == bj:
2025-07-01 05:49:39.557 if eqi is None:
2025-07-01 05:49:39.563 eqi, eqj = i, j
2025-07-01 05:49:39.570 continue
2025-07-01 05:49:39.575 cruncher.set_seq1(ai)
2025-07-01 05:49:39.581 # computing similarity is expensive, so use the quick
2025-07-01 05:49:39.587 # upper bounds first -- have seen this speed up messy
2025-07-01 05:49:39.593 # compares by a factor of 3.
2025-07-01 05:49:39.602 # note that ratio() is only expensive to compute the first
2025-07-01 05:49:39.609 # time it's called on a sequence pair; the expensive part
2025-07-01 05:49:39.617 # of the computation is cached by cruncher
2025-07-01 05:49:39.625 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:49:39.632 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:49:39.638 cruncher.ratio() > best_ratio:
2025-07-01 05:49:39.645 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:49:39.650 if best_ratio < cutoff:
2025-07-01 05:49:39.656 # no non-identical "pretty close" pair
2025-07-01 05:49:39.661 if eqi is None:
2025-07-01 05:49:39.667 # no identical pair either -- treat it as a straight replace
2025-07-01 05:49:39.673 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:49:39.679 return
2025-07-01 05:49:39.691 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:49:39.700 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:49:39.708 else:
2025-07-01 05:49:39.716 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:49:39.724 eqi = None
2025-07-01 05:49:39.730
2025-07-01 05:49:39.740 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:49:39.747 # identical
2025-07-01 05:49:39.755
2025-07-01 05:49:39.765 # pump out diffs from before the synch point
2025-07-01 05:49:39.777 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:49:39.788
2025-07-01 05:49:39.796 # do intraline marking on the synch pair
2025-07-01 05:49:39.803 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:49:39.811 if eqi is None:
2025-07-01 05:49:39.822 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:49:39.831 atags = btags = ""
2025-07-01 05:49:39.838 cruncher.set_seqs(aelt, belt)
2025-07-01 05:49:39.846 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:49:39.852 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:49:39.864 if tag == 'replace':
2025-07-01 05:49:39.877 atags += '^' * la
2025-07-01 05:49:39.887 btags += '^' * lb
2025-07-01 05:49:39.895 elif tag == 'delete':
2025-07-01 05:49:39.903 atags += '-' * la
2025-07-01 05:49:39.911 elif tag == 'insert':
2025-07-01 05:49:39.921 btags += '+' * lb
2025-07-01 05:49:39.929 elif tag == 'equal':
2025-07-01 05:49:39.937 atags += ' ' * la
2025-07-01 05:49:39.943 btags += ' ' * lb
2025-07-01 05:49:39.949 else:
2025-07-01 05:49:39.955 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:49:39.970 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:49:39.979 else:
2025-07-01 05:49:39.985 # the synch pair is identical
2025-07-01 05:49:39.990 yield ' ' + aelt
2025-07-01 05:49:39.995
2025-07-01 05:49:40.000 # pump out diffs from after the synch point
2025-07-01 05:49:40.007 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:49:40.016
2025-07-01 05:49:40.023 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:49:40.029 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:49:40.035
2025-07-01 05:49:40.045 self = <difflib.Differ object at [hex]>
2025-07-01 05:49:40.055 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:49:40.062 alo = 385, ahi = 1101
2025-07-01 05:49:40.070 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:49:40.079 blo = 385, bhi = 1101
2025-07-01 05:49:40.092
2025-07-01 05:49:40.103 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:49:40.112 g = []
2025-07-01 05:49:40.120 if alo < ahi:
2025-07-01 05:49:40.127 if blo < bhi:
2025-07-01 05:49:40.133 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:49:40.138 else:
2025-07-01 05:49:40.150 g = self._dump('-', a, alo, ahi)
2025-07-01 05:49:40.161 elif blo < bhi:
2025-07-01 05:49:40.173 g = self._dump('+', b, blo, bhi)
2025-07-01 05:49:40.183
2025-07-01 05:49:40.194 > yield from g
2025-07-01 05:49:40.202
2025-07-01 05:49:40.208 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:49:40.213 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:49:40.218
2025-07-01 05:49:40.223 self = <difflib.Differ object at [hex]>
2025-07-01 05:49:40.228 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:49:40.233 alo = 385, ahi = 1101
2025-07-01 05:49:40.239 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:49:40.244 blo = 385, bhi = 1101
2025-07-01 05:49:40.250
2025-07-01 05:49:40.261 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:49:40.268 r"""
2025-07-01 05:49:40.275 When replacing one block of lines with another, search the blocks
2025-07-01 05:49:40.282 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:49:40.292 synch point, and intraline difference marking is done on the
2025-07-01 05:49:40.301 similar pair. Lots of work, but often worth it.
2025-07-01 05:49:40.308
2025-07-01 05:49:40.315 Example:
2025-07-01 05:49:40.321
2025-07-01 05:49:40.327 >>> d = Differ()
2025-07-01 05:49:40.332 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:49:40.338 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:49:40.344 >>> print(''.join(results), end="")
2025-07-01 05:49:40.350 - abcDefghiJkl
2025-07-01 05:49:40.366 + abcdefGhijkl
2025-07-01 05:49:40.376 """
2025-07-01 05:49:40.381
2025-07-01 05:49:40.388 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:49:40.400 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:49:40.410 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:49:40.416 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:49:40.423 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:49:40.434
2025-07-01 05:49:40.445 # search for the pair that matches best without being identical
2025-07-01 05:49:40.457 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:49:40.468 # on junk -- unless we have to)
2025-07-01 05:49:40.479 for j in range(blo, bhi):
2025-07-01 05:49:40.492 bj = b[j]
2025-07-01 05:49:40.503 cruncher.set_seq2(bj)
2025-07-01 05:49:40.514 for i in range(alo, ahi):
2025-07-01 05:49:40.525 ai = a[i]
2025-07-01 05:49:40.536 if ai == bj:
2025-07-01 05:49:40.545 if eqi is None:
2025-07-01 05:49:40.552 eqi, eqj = i, j
2025-07-01 05:49:40.561 continue
2025-07-01 05:49:40.574 cruncher.set_seq1(ai)
2025-07-01 05:49:40.586 # computing similarity is expensive, so use the quick
2025-07-01 05:49:40.596 # upper bounds first -- have seen this speed up messy
2025-07-01 05:49:40.604 # compares by a factor of 3.
2025-07-01 05:49:40.611 # note that ratio() is only expensive to compute the first
2025-07-01 05:49:40.618 # time it's called on a sequence pair; the expensive part
2025-07-01 05:49:40.624 # of the computation is cached by cruncher
2025-07-01 05:49:40.631 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:49:40.645 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:49:40.653 cruncher.ratio() > best_ratio:
2025-07-01 05:49:40.661 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:49:40.672 if best_ratio < cutoff:
2025-07-01 05:49:40.683 # no non-identical "pretty close" pair
2025-07-01 05:49:40.695 if eqi is None:
2025-07-01 05:49:40.705 # no identical pair either -- treat it as a straight replace
2025-07-01 05:49:40.716 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:49:40.725 return
2025-07-01 05:49:40.737 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:49:40.746 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:49:40.757 else:
2025-07-01 05:49:40.767 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:49:40.775 eqi = None
2025-07-01 05:49:40.782
2025-07-01 05:49:40.791 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:49:40.801 # identical
2025-07-01 05:49:40.809
2025-07-01 05:49:40.815 # pump out diffs from before the synch point
2025-07-01 05:49:40.821 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:49:40.830
2025-07-01 05:49:40.838 # do intraline marking on the synch pair
2025-07-01 05:49:40.845 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:49:40.858 if eqi is None:
2025-07-01 05:49:40.866 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:49:40.874 atags = btags = ""
2025-07-01 05:49:40.886 cruncher.set_seqs(aelt, belt)
2025-07-01 05:49:40.896 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:49:40.902 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:49:40.909 if tag == 'replace':
2025-07-01 05:49:40.915 atags += '^' * la
2025-07-01 05:49:40.923 btags += '^' * lb
2025-07-01 05:49:40.932 elif tag == 'delete':
2025-07-01 05:49:40.940 atags += '-' * la
2025-07-01 05:49:40.946 elif tag == 'insert':
2025-07-01 05:49:40.952 btags += '+' * lb
2025-07-01 05:49:40.959 elif tag == 'equal':
2025-07-01 05:49:40.970 atags += ' ' * la
2025-07-01 05:49:40.979 btags += ' ' * lb
2025-07-01 05:49:40.985 else:
2025-07-01 05:49:40.995 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:49:41.004 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:49:41.013 else:
2025-07-01 05:49:41.020 # the synch pair is identical
2025-07-01 05:49:41.029 yield ' ' + aelt
2025-07-01 05:49:41.037
2025-07-01 05:49:41.045 # pump out diffs from after the synch point
2025-07-01 05:49:41.051 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:49:41.059
2025-07-01 05:49:41.071 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:49:41.079 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:49:41.085
2025-07-01 05:49:41.091 self = <difflib.Differ object at [hex]>
2025-07-01 05:49:41.104 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:49:41.112 alo = 386, ahi = 1101
2025-07-01 05:49:41.120 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:49:41.126 blo = 386, bhi = 1101
2025-07-01 05:49:41.133
2025-07-01 05:49:41.138 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:49:41.146 g = []
2025-07-01 05:49:41.154 if alo < ahi:
2025-07-01 05:49:41.164 if blo < bhi:
2025-07-01 05:49:41.174 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:49:41.181 else:
2025-07-01 05:49:41.187 g = self._dump('-', a, alo, ahi)
2025-07-01 05:49:41.193 elif blo < bhi:
2025-07-01 05:49:41.198 g = self._dump('+', b, blo, bhi)
2025-07-01 05:49:41.203
2025-07-01 05:49:41.208 > yield from g
2025-07-01 05:49:41.213
2025-07-01 05:49:41.219 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:49:41.224 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:49:41.229
2025-07-01 05:49:41.235 self = <difflib.Differ object at [hex]>
2025-07-01 05:49:41.242 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:49:41.249 alo = 386, ahi = 1101
2025-07-01 05:49:41.256 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:49:41.267 blo = 386, bhi = 1101
2025-07-01 05:49:41.277
2025-07-01 05:49:41.285 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:49:41.292 r"""
2025-07-01 05:49:41.298 When replacing one block of lines with another, search the blocks
2025-07-01 05:49:41.306 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:49:41.316 synch point, and intraline difference marking is done on the
2025-07-01 05:49:41.325 similar pair. Lots of work, but often worth it.
2025-07-01 05:49:41.333
2025-07-01 05:49:41.339 Example:
2025-07-01 05:49:41.347
2025-07-01 05:49:41.359 >>> d = Differ()
2025-07-01 05:49:41.366 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:49:41.372 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:49:41.378 >>> print(''.join(results), end="")
2025-07-01 05:49:41.390 - abcDefghiJkl
2025-07-01 05:49:41.407 + abcdefGhijkl
2025-07-01 05:49:41.428 """
2025-07-01 05:49:41.434
2025-07-01 05:49:41.441 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:49:41.448 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:49:41.453 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:49:41.459 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:49:41.465 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:49:41.471
2025-07-01 05:49:41.477 # search for the pair that matches best without being identical
2025-07-01 05:49:41.483 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:49:41.490 # on junk -- unless we have to)
2025-07-01 05:49:41.497 for j in range(blo, bhi):
2025-07-01 05:49:41.502 bj = b[j]
2025-07-01 05:49:41.508 cruncher.set_seq2(bj)
2025-07-01 05:49:41.514 for i in range(alo, ahi):
2025-07-01 05:49:41.520 ai = a[i]
2025-07-01 05:49:41.528 if ai == bj:
2025-07-01 05:49:41.535 if eqi is None:
2025-07-01 05:49:41.544 eqi, eqj = i, j
2025-07-01 05:49:41.556 continue
2025-07-01 05:49:41.565 cruncher.set_seq1(ai)
2025-07-01 05:49:41.572 # computing similarity is expensive, so use the quick
2025-07-01 05:49:41.578 # upper bounds first -- have seen this speed up messy
2025-07-01 05:49:41.585 # compares by a factor of 3.
2025-07-01 05:49:41.591 # note that ratio() is only expensive to compute the first
2025-07-01 05:49:41.597 # time it's called on a sequence pair; the expensive part
2025-07-01 05:49:41.603 # of the computation is cached by cruncher
2025-07-01 05:49:41.610 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:49:41.616 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:49:41.622 cruncher.ratio() > best_ratio:
2025-07-01 05:49:41.628 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:49:41.634 if best_ratio < cutoff:
2025-07-01 05:49:41.640 # no non-identical "pretty close" pair
2025-07-01 05:49:41.647 if eqi is None:
2025-07-01 05:49:41.654 # no identical pair either -- treat it as a straight replace
2025-07-01 05:49:41.662 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:49:41.670 return
2025-07-01 05:49:41.677 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:49:41.684 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:49:41.690 else:
2025-07-01 05:49:41.698 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:49:41.708 eqi = None
2025-07-01 05:49:41.719
2025-07-01 05:49:41.732 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:49:41.744 # identical
2025-07-01 05:49:41.751
2025-07-01 05:49:41.759 # pump out diffs from before the synch point
2025-07-01 05:49:41.767 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:49:41.778
2025-07-01 05:49:41.786 # do intraline marking on the synch pair
2025-07-01 05:49:41.793 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:49:41.799 if eqi is None:
2025-07-01 05:49:41.805 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:49:41.809 atags = btags = ""
2025-07-01 05:49:41.814 cruncher.set_seqs(aelt, belt)
2025-07-01 05:49:41.820 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:49:41.826 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:49:41.832 if tag == 'replace':
2025-07-01 05:49:41.838 atags += '^' * la
2025-07-01 05:49:41.844 btags += '^' * lb
2025-07-01 05:49:41.851 elif tag == 'delete':
2025-07-01 05:49:41.858 atags += '-' * la
2025-07-01 05:49:41.867 elif tag == 'insert':
2025-07-01 05:49:41.880 btags += '+' * lb
2025-07-01 05:49:41.892 elif tag == 'equal':
2025-07-01 05:49:41.901 atags += ' ' * la
2025-07-01 05:49:41.913 btags += ' ' * lb
2025-07-01 05:49:41.925 else:
2025-07-01 05:49:41.936 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:49:41.945 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:49:41.952 else:
2025-07-01 05:49:41.958 # the synch pair is identical
2025-07-01 05:49:41.963 yield ' ' + aelt
2025-07-01 05:49:41.969
2025-07-01 05:49:41.975 # pump out diffs from after the synch point
2025-07-01 05:49:41.982 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:49:41.989
2025-07-01 05:49:41.995 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:49:42.003 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:49:42.014
2025-07-01 05:49:42.027 self = <difflib.Differ object at [hex]>
2025-07-01 05:49:42.035 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:49:42.042 alo = 387, ahi = 1101
2025-07-01 05:49:42.050 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:49:42.055 blo = 387, bhi = 1101
2025-07-01 05:49:42.060
2025-07-01 05:49:42.066 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:49:42.071 g = []
2025-07-01 05:49:42.080 if alo < ahi:
2025-07-01 05:49:42.091 if blo < bhi:
2025-07-01 05:49:42.098 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:49:42.106 else:
2025-07-01 05:49:42.114 g = self._dump('-', a, alo, ahi)
2025-07-01 05:49:42.122 elif blo < bhi:
2025-07-01 05:49:42.131 g = self._dump('+', b, blo, bhi)
2025-07-01 05:49:42.145
2025-07-01 05:49:42.154 > yield from g
2025-07-01 05:49:42.161
2025-07-01 05:49:42.168 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:49:42.175 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:49:42.180
2025-07-01 05:49:42.193 self = <difflib.Differ object at [hex]>
2025-07-01 05:49:42.207 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:49:42.217 alo = 387, ahi = 1101
2025-07-01 05:49:42.225 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:49:42.231 blo = 387, bhi = 1101
2025-07-01 05:49:42.236
2025-07-01 05:49:42.242 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:49:42.252 r"""
2025-07-01 05:49:42.262 When replacing one block of lines with another, search the blocks
2025-07-01 05:49:42.269 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:49:42.275 synch point, and intraline difference marking is done on the
2025-07-01 05:49:42.283 similar pair. Lots of work, but often worth it.
2025-07-01 05:49:42.291
2025-07-01 05:49:42.298 Example:
2025-07-01 05:49:42.305
2025-07-01 05:49:42.317 >>> d = Differ()
2025-07-01 05:49:42.326 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:49:42.340 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:49:42.351 >>> print(''.join(results), end="")
2025-07-01 05:49:42.358 - abcDefghiJkl
2025-07-01 05:49:42.371 + abcdefGhijkl
2025-07-01 05:49:42.390 """
2025-07-01 05:49:42.399
2025-07-01 05:49:42.407 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:49:42.418 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:49:42.426 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:49:42.434 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:49:42.443 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:49:42.449
2025-07-01 05:49:42.460 # search for the pair that matches best without being identical
2025-07-01 05:49:42.470 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:49:42.480 # on junk -- unless we have to)
2025-07-01 05:49:42.491 for j in range(blo, bhi):
2025-07-01 05:49:42.500 bj = b[j]
2025-07-01 05:49:42.508 cruncher.set_seq2(bj)
2025-07-01 05:49:42.515 for i in range(alo, ahi):
2025-07-01 05:49:42.525 ai = a[i]
2025-07-01 05:49:42.531 if ai == bj:
2025-07-01 05:49:42.538 if eqi is None:
2025-07-01 05:49:42.543 eqi, eqj = i, j
2025-07-01 05:49:42.549 continue
2025-07-01 05:49:42.555 cruncher.set_seq1(ai)
2025-07-01 05:49:42.562 # computing similarity is expensive, so use the quick
2025-07-01 05:49:42.568 # upper bounds first -- have seen this speed up messy
2025-07-01 05:49:42.575 # compares by a factor of 3.
2025-07-01 05:49:42.582 # note that ratio() is only expensive to compute the first
2025-07-01 05:49:42.589 # time it's called on a sequence pair; the expensive part
2025-07-01 05:49:42.595 # of the computation is cached by cruncher
2025-07-01 05:49:42.601 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:49:42.606 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:49:42.612 cruncher.ratio() > best_ratio:
2025-07-01 05:49:42.619 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:49:42.625 if best_ratio < cutoff:
2025-07-01 05:49:42.631 # no non-identical "pretty close" pair
2025-07-01 05:49:42.637 if eqi is None:
2025-07-01 05:49:42.644 # no identical pair either -- treat it as a straight replace
2025-07-01 05:49:42.651 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:49:42.658 return
2025-07-01 05:49:42.665 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:49:42.672 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:49:42.678 else:
2025-07-01 05:49:42.687 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:49:42.696 eqi = None
2025-07-01 05:49:42.706
2025-07-01 05:49:42.714 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:49:42.720 # identical
2025-07-01 05:49:42.727
2025-07-01 05:49:42.741 # pump out diffs from before the synch point
2025-07-01 05:49:42.752 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:49:42.761
2025-07-01 05:49:42.768 # do intraline marking on the synch pair
2025-07-01 05:49:42.774 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:49:42.784 if eqi is None:
2025-07-01 05:49:42.795 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:49:42.805 atags = btags = ""
2025-07-01 05:49:42.812 cruncher.set_seqs(aelt, belt)
2025-07-01 05:49:42.820 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:49:42.826 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:49:42.831 if tag == 'replace':
2025-07-01 05:49:42.839 atags += '^' * la
2025-07-01 05:49:42.849 btags += '^' * lb
2025-07-01 05:49:42.856 elif tag == 'delete':
2025-07-01 05:49:42.863 atags += '-' * la
2025-07-01 05:49:42.868 elif tag == 'insert':
2025-07-01 05:49:42.874 btags += '+' * lb
2025-07-01 05:49:42.882 elif tag == 'equal':
2025-07-01 05:49:42.889 atags += ' ' * la
2025-07-01 05:49:42.895 btags += ' ' * lb
2025-07-01 05:49:42.902 else:
2025-07-01 05:49:42.910 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:49:42.919 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:49:42.930 else:
2025-07-01 05:49:42.939 # the synch pair is identical
2025-07-01 05:49:42.947 yield ' ' + aelt
2025-07-01 05:49:42.957
2025-07-01 05:49:42.969 # pump out diffs from after the synch point
2025-07-01 05:49:42.980 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:49:42.988
2025-07-01 05:49:42.996 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:49:43.004 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:49:43.016
2025-07-01 05:49:43.027 self = <difflib.Differ object at [hex]>
2025-07-01 05:49:43.037 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:49:43.044 alo = 388, ahi = 1101
2025-07-01 05:49:43.052 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:49:43.061 blo = 388, bhi = 1101
2025-07-01 05:49:43.072
2025-07-01 05:49:43.080 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:49:43.086 g = []
2025-07-01 05:49:43.093 if alo < ahi:
2025-07-01 05:49:43.100 if blo < bhi:
2025-07-01 05:49:43.106 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:49:43.112 else:
2025-07-01 05:49:43.118 g = self._dump('-', a, alo, ahi)
2025-07-01 05:49:43.123 elif blo < bhi:
2025-07-01 05:49:43.129 g = self._dump('+', b, blo, bhi)
2025-07-01 05:49:43.142
2025-07-01 05:49:43.152 > yield from g
2025-07-01 05:49:43.161
2025-07-01 05:49:43.169 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:49:43.175 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:49:43.182
2025-07-01 05:49:43.188 self = <difflib.Differ object at [hex]>
2025-07-01 05:49:43.199 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:49:43.205 alo = 388, ahi = 1101
2025-07-01 05:49:43.213 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:49:43.219 blo = 388, bhi = 1101
2025-07-01 05:49:43.225
2025-07-01 05:49:43.231 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:49:43.237 r"""
2025-07-01 05:49:43.243 When replacing one block of lines with another, search the blocks
2025-07-01 05:49:43.251 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:49:43.260 synch point, and intraline difference marking is done on the
2025-07-01 05:49:43.269 similar pair. Lots of work, but often worth it.
2025-07-01 05:49:43.276
2025-07-01 05:49:43.282 Example:
2025-07-01 05:49:43.288
2025-07-01 05:49:43.294 >>> d = Differ()
2025-07-01 05:49:43.299 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:49:43.307 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:49:43.318 >>> print(''.join(results), end="")
2025-07-01 05:49:43.326 - abcDefghiJkl
2025-07-01 05:49:43.339 + abcdefGhijkl
2025-07-01 05:49:43.363 """
2025-07-01 05:49:43.375
2025-07-01 05:49:43.384 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:49:43.392 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:49:43.401 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:49:43.414 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:49:43.424 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:49:43.433
2025-07-01 05:49:43.445 # search for the pair that matches best without being identical
2025-07-01 05:49:43.456 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:49:43.464 # on junk -- unless we have to)
2025-07-01 05:49:43.471 for j in range(blo, bhi):
2025-07-01 05:49:43.477 bj = b[j]
2025-07-01 05:49:43.483 cruncher.set_seq2(bj)
2025-07-01 05:49:43.489 for i in range(alo, ahi):
2025-07-01 05:49:43.496 ai = a[i]
2025-07-01 05:49:43.503 if ai == bj:
2025-07-01 05:49:43.514 if eqi is None:
2025-07-01 05:49:43.523 eqi, eqj = i, j
2025-07-01 05:49:43.531 continue
2025-07-01 05:49:43.538 cruncher.set_seq1(ai)
2025-07-01 05:49:43.549 # computing similarity is expensive, so use the quick
2025-07-01 05:49:43.559 # upper bounds first -- have seen this speed up messy
2025-07-01 05:49:43.565 # compares by a factor of 3.
2025-07-01 05:49:43.570 # note that ratio() is only expensive to compute the first
2025-07-01 05:49:43.575 # time it's called on a sequence pair; the expensive part
2025-07-01 05:49:43.579 # of the computation is cached by cruncher
2025-07-01 05:49:43.584 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:49:43.588 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:49:43.592 cruncher.ratio() > best_ratio:
2025-07-01 05:49:43.597 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:49:43.601 if best_ratio < cutoff:
2025-07-01 05:49:43.605 # no non-identical "pretty close" pair
2025-07-01 05:49:43.610 if eqi is None:
2025-07-01 05:49:43.614 # no identical pair either -- treat it as a straight replace
2025-07-01 05:49:43.619 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:49:43.623 return
2025-07-01 05:49:43.627 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:49:43.632 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:49:43.636 else:
2025-07-01 05:49:43.640 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:49:43.645 eqi = None
2025-07-01 05:49:43.649
2025-07-01 05:49:43.653 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:49:43.658 # identical
2025-07-01 05:49:43.663
2025-07-01 05:49:43.673 # pump out diffs from before the synch point
2025-07-01 05:49:43.679 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:49:43.685
2025-07-01 05:49:43.692 # do intraline marking on the synch pair
2025-07-01 05:49:43.698 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:49:43.706 if eqi is None:
2025-07-01 05:49:43.717 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:49:43.727 atags = btags = ""
2025-07-01 05:49:43.735 cruncher.set_seqs(aelt, belt)
2025-07-01 05:49:43.742 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:49:43.749 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:49:43.755 if tag == 'replace':
2025-07-01 05:49:43.760 atags += '^' * la
2025-07-01 05:49:43.765 btags += '^' * lb
2025-07-01 05:49:43.770 elif tag == 'delete':
2025-07-01 05:49:43.774 atags += '-' * la
2025-07-01 05:49:43.779 elif tag == 'insert':
2025-07-01 05:49:43.787 btags += '+' * lb
2025-07-01 05:49:43.793 elif tag == 'equal':
2025-07-01 05:49:43.799 atags += ' ' * la
2025-07-01 05:49:43.804 btags += ' ' * lb
2025-07-01 05:49:43.808 else:
2025-07-01 05:49:43.813 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:49:43.818 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:49:43.822 else:
2025-07-01 05:49:43.827 # the synch pair is identical
2025-07-01 05:49:43.832 yield ' ' + aelt
2025-07-01 05:49:43.837
2025-07-01 05:49:43.844 # pump out diffs from after the synch point
2025-07-01 05:49:43.851 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:49:43.858
2025-07-01 05:49:43.867 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:49:43.874 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:49:43.879
2025-07-01 05:49:43.883 self = <difflib.Differ object at [hex]>
2025-07-01 05:49:43.888 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:49:43.893 alo = 389, ahi = 1101
2025-07-01 05:49:43.898 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:49:43.904 blo = 389, bhi = 1101
2025-07-01 05:49:43.910
2025-07-01 05:49:43.920 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:49:43.931 g = []
2025-07-01 05:49:43.943 if alo < ahi:
2025-07-01 05:49:43.952 if blo < bhi:
2025-07-01 05:49:43.958 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:49:43.964 else:
2025-07-01 05:49:43.969 g = self._dump('-', a, alo, ahi)
2025-07-01 05:49:43.975 elif blo < bhi:
2025-07-01 05:49:43.980 g = self._dump('+', b, blo, bhi)
2025-07-01 05:49:43.986
2025-07-01 05:49:43.996 > yield from g
2025-07-01 05:49:44.006
2025-07-01 05:49:44.013 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:49:44.020 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:49:44.025
2025-07-01 05:49:44.036 self = <difflib.Differ object at [hex]>
2025-07-01 05:49:44.044 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:49:44.051 alo = 389, ahi = 1101
2025-07-01 05:49:44.057 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:49:44.063 blo = 389, bhi = 1101
2025-07-01 05:49:44.068
2025-07-01 05:49:44.074 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:49:44.083 r"""
2025-07-01 05:49:44.090 When replacing one block of lines with another, search the blocks
2025-07-01 05:49:44.097 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:49:44.107 synch point, and intraline difference marking is done on the
2025-07-01 05:49:44.114 similar pair. Lots of work, but often worth it.
2025-07-01 05:49:44.120
2025-07-01 05:49:44.126 Example:
2025-07-01 05:49:44.133
2025-07-01 05:49:44.140 >>> d = Differ()
2025-07-01 05:49:44.148 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:49:44.155 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:49:44.162 >>> print(''.join(results), end="")
2025-07-01 05:49:44.169 - abcDefghiJkl
2025-07-01 05:49:44.190 + abcdefGhijkl
2025-07-01 05:49:44.211 """
2025-07-01 05:49:44.220
2025-07-01 05:49:44.228 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:49:44.235 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:49:44.241 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:49:44.253 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:49:44.261 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:49:44.268
2025-07-01 05:49:44.276 # search for the pair that matches best without being identical
2025-07-01 05:49:44.282 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:49:44.289 # on junk -- unless we have to)
2025-07-01 05:49:44.295 for j in range(blo, bhi):
2025-07-01 05:49:44.301 bj = b[j]
2025-07-01 05:49:44.307 cruncher.set_seq2(bj)
2025-07-01 05:49:44.313 for i in range(alo, ahi):
2025-07-01 05:49:44.319 ai = a[i]
2025-07-01 05:49:44.324 if ai == bj:
2025-07-01 05:49:44.329 if eqi is None:
2025-07-01 05:49:44.333 eqi, eqj = i, j
2025-07-01 05:49:44.338 continue
2025-07-01 05:49:44.342 cruncher.set_seq1(ai)
2025-07-01 05:49:44.347 # computing similarity is expensive, so use the quick
2025-07-01 05:49:44.351 # upper bounds first -- have seen this speed up messy
2025-07-01 05:49:44.358 # compares by a factor of 3.
2025-07-01 05:49:44.366 # note that ratio() is only expensive to compute the first
2025-07-01 05:49:44.374 # time it's called on a sequence pair; the expensive part
2025-07-01 05:49:44.383 # of the computation is cached by cruncher
2025-07-01 05:49:44.394 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:49:44.403 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:49:44.411 cruncher.ratio() > best_ratio:
2025-07-01 05:49:44.417 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:49:44.424 if best_ratio < cutoff:
2025-07-01 05:49:44.430 # no non-identical "pretty close" pair
2025-07-01 05:49:44.436 if eqi is None:
2025-07-01 05:49:44.442 # no identical pair either -- treat it as a straight replace
2025-07-01 05:49:44.448 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:49:44.454 return
2025-07-01 05:49:44.464 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:49:44.475 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:49:44.486 else:
2025-07-01 05:49:44.496 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:49:44.503 eqi = None
2025-07-01 05:49:44.510
2025-07-01 05:49:44.517 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:49:44.522 # identical
2025-07-01 05:49:44.528
2025-07-01 05:49:44.535 # pump out diffs from before the synch point
2025-07-01 05:49:44.545 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:49:44.552
2025-07-01 05:49:44.558 # do intraline marking on the synch pair
2025-07-01 05:49:44.565 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:49:44.573 if eqi is None:
2025-07-01 05:49:44.582 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:49:44.590 atags = btags = ""
2025-07-01 05:49:44.599 cruncher.set_seqs(aelt, belt)
2025-07-01 05:49:44.605 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:49:44.612 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:49:44.618 if tag == 'replace':
2025-07-01 05:49:44.625 atags += '^' * la
2025-07-01 05:49:44.632 btags += '^' * lb
2025-07-01 05:49:44.638 elif tag == 'delete':
2025-07-01 05:49:44.644 atags += '-' * la
2025-07-01 05:49:44.650 elif tag == 'insert':
2025-07-01 05:49:44.655 btags += '+' * lb
2025-07-01 05:49:44.662 elif tag == 'equal':
2025-07-01 05:49:44.668 atags += ' ' * la
2025-07-01 05:49:44.674 btags += ' ' * lb
2025-07-01 05:49:44.679 else:
2025-07-01 05:49:44.685 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:49:44.691 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:49:44.697 else:
2025-07-01 05:49:44.702 # the synch pair is identical
2025-07-01 05:49:44.714 yield ' ' + aelt
2025-07-01 05:49:44.725
2025-07-01 05:49:44.737 # pump out diffs from after the synch point
2025-07-01 05:49:44.747 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:49:44.753
2025-07-01 05:49:44.760 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:49:44.766 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:49:44.774
2025-07-01 05:49:44.785 self = <difflib.Differ object at [hex]>
2025-07-01 05:49:44.793 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:49:44.800 alo = 390, ahi = 1101
2025-07-01 05:49:44.806 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:49:44.814 blo = 390, bhi = 1101
2025-07-01 05:49:44.819
2025-07-01 05:49:44.826 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:49:44.832 g = []
2025-07-01 05:49:44.839 if alo < ahi:
2025-07-01 05:49:44.849 if blo < bhi:
2025-07-01 05:49:44.857 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:49:44.865 else:
2025-07-01 05:49:44.871 g = self._dump('-', a, alo, ahi)
2025-07-01 05:49:44.877 elif blo < bhi:
2025-07-01 05:49:44.882 g = self._dump('+', b, blo, bhi)
2025-07-01 05:49:44.893
2025-07-01 05:49:44.902 > yield from g
2025-07-01 05:49:44.910
2025-07-01 05:49:44.917 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:49:44.928 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:49:44.937
2025-07-01 05:49:44.950 self = <difflib.Differ object at [hex]>
2025-07-01 05:49:44.960 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:49:44.969 alo = 390, ahi = 1101
2025-07-01 05:49:44.978 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:49:44.987 blo = 390, bhi = 1101
2025-07-01 05:49:44.999
2025-07-01 05:49:45.009 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:49:45.017 r"""
2025-07-01 05:49:45.024 When replacing one block of lines with another, search the blocks
2025-07-01 05:49:45.031 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:49:45.037 synch point, and intraline difference marking is done on the
2025-07-01 05:49:45.046 similar pair. Lots of work, but often worth it.
2025-07-01 05:49:45.058
2025-07-01 05:49:45.067 Example:
2025-07-01 05:49:45.075
2025-07-01 05:49:45.089 >>> d = Differ()
2025-07-01 05:49:45.099 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:49:45.108 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:49:45.114 >>> print(''.join(results), end="")
2025-07-01 05:49:45.121 - abcDefghiJkl
2025-07-01 05:49:45.132 + abcdefGhijkl
2025-07-01 05:49:45.151 """
2025-07-01 05:49:45.159
2025-07-01 05:49:45.165 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:49:45.170 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:49:45.176 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:49:45.182 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:49:45.187 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:49:45.194
2025-07-01 05:49:45.203 # search for the pair that matches best without being identical
2025-07-01 05:49:45.213 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:49:45.225 # on junk -- unless we have to)
2025-07-01 05:49:45.235 for j in range(blo, bhi):
2025-07-01 05:49:45.243 bj = b[j]
2025-07-01 05:49:45.252 cruncher.set_seq2(bj)
2025-07-01 05:49:45.264 for i in range(alo, ahi):
2025-07-01 05:49:45.273 ai = a[i]
2025-07-01 05:49:45.284 if ai == bj:
2025-07-01 05:49:45.294 if eqi is None:
2025-07-01 05:49:45.303 eqi, eqj = i, j
2025-07-01 05:49:45.310 continue
2025-07-01 05:49:45.319 cruncher.set_seq1(ai)
2025-07-01 05:49:45.327 # computing similarity is expensive, so use the quick
2025-07-01 05:49:45.334 # upper bounds first -- have seen this speed up messy
2025-07-01 05:49:45.340 # compares by a factor of 3.
2025-07-01 05:49:45.347 # note that ratio() is only expensive to compute the first
2025-07-01 05:49:45.355 # time it's called on a sequence pair; the expensive part
2025-07-01 05:49:45.365 # of the computation is cached by cruncher
2025-07-01 05:49:45.372 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:49:45.379 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:49:45.386 cruncher.ratio() > best_ratio:
2025-07-01 05:49:45.396 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:49:45.404 if best_ratio < cutoff:
2025-07-01 05:49:45.410 # no non-identical "pretty close" pair
2025-07-01 05:49:45.416 if eqi is None:
2025-07-01 05:49:45.421 # no identical pair either -- treat it as a straight replace
2025-07-01 05:49:45.427 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:49:45.432 return
2025-07-01 05:49:45.438 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:49:45.448 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:49:45.459 else:
2025-07-01 05:49:45.470 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:49:45.479 eqi = None
2025-07-01 05:49:45.487
2025-07-01 05:49:45.495 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:49:45.504 # identical
2025-07-01 05:49:45.513
2025-07-01 05:49:45.520 # pump out diffs from before the synch point
2025-07-01 05:49:45.527 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:49:45.536
2025-07-01 05:49:45.544 # do intraline marking on the synch pair
2025-07-01 05:49:45.551 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:49:45.559 if eqi is None:
2025-07-01 05:49:45.568 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:49:45.579 atags = btags = ""
2025-07-01 05:49:45.589 cruncher.set_seqs(aelt, belt)
2025-07-01 05:49:45.599 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:49:45.609 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:49:45.618 if tag == 'replace':
2025-07-01 05:49:45.627 atags += '^' * la
2025-07-01 05:49:45.638 btags += '^' * lb
2025-07-01 05:49:45.646 elif tag == 'delete':
2025-07-01 05:49:45.656 atags += '-' * la
2025-07-01 05:49:45.667 elif tag == 'insert':
2025-07-01 05:49:45.676 btags += '+' * lb
2025-07-01 05:49:45.687 elif tag == 'equal':
2025-07-01 05:49:45.698 atags += ' ' * la
2025-07-01 05:49:45.707 btags += ' ' * lb
2025-07-01 05:49:45.715 else:
2025-07-01 05:49:45.722 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:49:45.733 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:49:45.743 else:
2025-07-01 05:49:45.750 # the synch pair is identical
2025-07-01 05:49:45.756 yield ' ' + aelt
2025-07-01 05:49:45.763
2025-07-01 05:49:45.769 # pump out diffs from after the synch point
2025-07-01 05:49:45.775 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:49:45.780
2025-07-01 05:49:45.789 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:49:45.801 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:49:45.812
2025-07-01 05:49:45.823 self = <difflib.Differ object at [hex]>
2025-07-01 05:49:45.832 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:49:45.844 alo = 391, ahi = 1101
2025-07-01 05:49:45.854 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:49:45.863 blo = 391, bhi = 1101
2025-07-01 05:49:45.870
2025-07-01 05:49:45.882 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:49:45.892 g = []
2025-07-01 05:49:45.901 if alo < ahi:
2025-07-01 05:49:45.908 if blo < bhi:
2025-07-01 05:49:45.914 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:49:45.921 else:
2025-07-01 05:49:45.928 g = self._dump('-', a, alo, ahi)
2025-07-01 05:49:45.934 elif blo < bhi:
2025-07-01 05:49:45.945 g = self._dump('+', b, blo, bhi)
2025-07-01 05:49:45.956
2025-07-01 05:49:45.964 > yield from g
2025-07-01 05:49:45.972
2025-07-01 05:49:45.982 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:49:45.993 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:49:46.000
2025-07-01 05:49:46.006 self = <difflib.Differ object at [hex]>
2025-07-01 05:49:46.012 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:49:46.018 alo = 391, ahi = 1101
2025-07-01 05:49:46.026 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:49:46.032 blo = 391, bhi = 1101
2025-07-01 05:49:46.037
2025-07-01 05:49:46.043 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:49:46.050 r"""
2025-07-01 05:49:46.056 When replacing one block of lines with another, search the blocks
2025-07-01 05:49:46.062 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:49:46.073 synch point, and intraline difference marking is done on the
2025-07-01 05:49:46.083 similar pair. Lots of work, but often worth it.
2025-07-01 05:49:46.090
2025-07-01 05:49:46.095 Example:
2025-07-01 05:49:46.100
2025-07-01 05:49:46.104 >>> d = Differ()
2025-07-01 05:49:46.109 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:49:46.113 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:49:46.126 >>> print(''.join(results), end="")
2025-07-01 05:49:46.135 - abcDefghiJkl
2025-07-01 05:49:46.147 + abcdefGhijkl
2025-07-01 05:49:46.160 """
2025-07-01 05:49:46.166
2025-07-01 05:49:46.177 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:49:46.188 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:49:46.195 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:49:46.201 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:49:46.208 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:49:46.215
2025-07-01 05:49:46.223 # search for the pair that matches best without being identical
2025-07-01 05:49:46.229 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:49:46.235 # on junk -- unless we have to)
2025-07-01 05:49:46.240 for j in range(blo, bhi):
2025-07-01 05:49:46.245 bj = b[j]
2025-07-01 05:49:46.249 cruncher.set_seq2(bj)
2025-07-01 05:49:46.255 for i in range(alo, ahi):
2025-07-01 05:49:46.260 ai = a[i]
2025-07-01 05:49:46.266 if ai == bj:
2025-07-01 05:49:46.272 if eqi is None:
2025-07-01 05:49:46.278 eqi, eqj = i, j
2025-07-01 05:49:46.287 continue
2025-07-01 05:49:46.299 cruncher.set_seq1(ai)
2025-07-01 05:49:46.309 # computing similarity is expensive, so use the quick
2025-07-01 05:49:46.316 # upper bounds first -- have seen this speed up messy
2025-07-01 05:49:46.326 # compares by a factor of 3.
2025-07-01 05:49:46.338 # note that ratio() is only expensive to compute the first
2025-07-01 05:49:46.349 # time it's called on a sequence pair; the expensive part
2025-07-01 05:49:46.357 # of the computation is cached by cruncher
2025-07-01 05:49:46.365 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:49:46.373 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:49:46.386 cruncher.ratio() > best_ratio:
2025-07-01 05:49:46.398 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:49:46.405 if best_ratio < cutoff:
2025-07-01 05:49:46.412 # no non-identical "pretty close" pair
2025-07-01 05:49:46.418 if eqi is None:
2025-07-01 05:49:46.428 # no identical pair either -- treat it as a straight replace
2025-07-01 05:49:46.440 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:49:46.451 return
2025-07-01 05:49:46.461 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:49:46.469 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:49:46.475 else:
2025-07-01 05:49:46.481 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:49:46.487 eqi = None
2025-07-01 05:49:46.493
2025-07-01 05:49:46.499 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:49:46.507 # identical
2025-07-01 05:49:46.518
2025-07-01 05:49:46.526 # pump out diffs from before the synch point
2025-07-01 05:49:46.533 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:49:46.540
2025-07-01 05:49:46.546 # do intraline marking on the synch pair
2025-07-01 05:49:46.551 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:49:46.557 if eqi is None:
2025-07-01 05:49:46.563 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:49:46.571 atags = btags = ""
2025-07-01 05:49:46.577 cruncher.set_seqs(aelt, belt)
2025-07-01 05:49:46.589 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:49:46.599 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:49:46.605 if tag == 'replace':
2025-07-01 05:49:46.611 atags += '^' * la
2025-07-01 05:49:46.619 btags += '^' * lb
2025-07-01 05:49:46.629 elif tag == 'delete':
2025-07-01 05:49:46.639 atags += '-' * la
2025-07-01 05:49:46.648 elif tag == 'insert':
2025-07-01 05:49:46.655 btags += '+' * lb
2025-07-01 05:49:46.661 elif tag == 'equal':
2025-07-01 05:49:46.667 atags += ' ' * la
2025-07-01 05:49:46.672 btags += ' ' * lb
2025-07-01 05:49:46.678 else:
2025-07-01 05:49:46.685 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:49:46.691 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:49:46.700 else:
2025-07-01 05:49:46.711 # the synch pair is identical
2025-07-01 05:49:46.721 yield ' ' + aelt
2025-07-01 05:49:46.729
2025-07-01 05:49:46.735 # pump out diffs from after the synch point
2025-07-01 05:49:46.748 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:49:46.756
2025-07-01 05:49:46.762 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:49:46.768 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:49:46.776
2025-07-01 05:49:46.787 self = <difflib.Differ object at [hex]>
2025-07-01 05:49:46.796 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:49:46.803 alo = 392, ahi = 1101
2025-07-01 05:49:46.811 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:49:46.818 blo = 392, bhi = 1101
2025-07-01 05:49:46.826
2025-07-01 05:49:46.833 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:49:46.840 g = []
2025-07-01 05:49:46.848 if alo < ahi:
2025-07-01 05:49:46.855 if blo < bhi:
2025-07-01 05:49:46.862 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:49:46.871 else:
2025-07-01 05:49:46.883 g = self._dump('-', a, alo, ahi)
2025-07-01 05:49:46.893 elif blo < bhi:
2025-07-01 05:49:46.901 g = self._dump('+', b, blo, bhi)
2025-07-01 05:49:46.907
2025-07-01 05:49:46.914 > yield from g
2025-07-01 05:49:46.926
2025-07-01 05:49:46.935 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:49:46.944 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:49:46.951
2025-07-01 05:49:46.956 self = <difflib.Differ object at [hex]>
2025-07-01 05:49:46.963 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:49:46.968 alo = 392, ahi = 1101
2025-07-01 05:49:46.975 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:49:46.983 blo = 392, bhi = 1101
2025-07-01 05:49:46.993
2025-07-01 05:49:47.001 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:49:47.008 r"""
2025-07-01 05:49:47.015 When replacing one block of lines with another, search the blocks
2025-07-01 05:49:47.024 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:49:47.030 synch point, and intraline difference marking is done on the
2025-07-01 05:49:47.036 similar pair. Lots of work, but often worth it.
2025-07-01 05:49:47.043
2025-07-01 05:49:47.049 Example:
2025-07-01 05:49:47.056
2025-07-01 05:49:47.063 >>> d = Differ()
2025-07-01 05:49:47.070 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:49:47.079 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:49:47.089 >>> print(''.join(results), end="")
2025-07-01 05:49:47.097 - abcDefghiJkl
2025-07-01 05:49:47.108 + abcdefGhijkl
2025-07-01 05:49:47.118 """
2025-07-01 05:49:47.123
2025-07-01 05:49:47.127 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:49:47.132 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:49:47.137 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:49:47.141 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:49:47.147 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:49:47.152
2025-07-01 05:49:47.165 # search for the pair that matches best without being identical
2025-07-01 05:49:47.173 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:49:47.180 # on junk -- unless we have to)
2025-07-01 05:49:47.186 for j in range(blo, bhi):
2025-07-01 05:49:47.193 bj = b[j]
2025-07-01 05:49:47.201 cruncher.set_seq2(bj)
2025-07-01 05:49:47.207 for i in range(alo, ahi):
2025-07-01 05:49:47.219 ai = a[i]
2025-07-01 05:49:47.226 if ai == bj:
2025-07-01 05:49:47.235 if eqi is None:
2025-07-01 05:49:47.242 eqi, eqj = i, j
2025-07-01 05:49:47.248 continue
2025-07-01 05:49:47.255 cruncher.set_seq1(ai)
2025-07-01 05:49:47.260 # computing similarity is expensive, so use the quick
2025-07-01 05:49:47.264 # upper bounds first -- have seen this speed up messy
2025-07-01 05:49:47.269 # compares by a factor of 3.
2025-07-01 05:49:47.275 # note that ratio() is only expensive to compute the first
2025-07-01 05:49:47.281 # time it's called on a sequence pair; the expensive part
2025-07-01 05:49:47.287 # of the computation is cached by cruncher
2025-07-01 05:49:47.298 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:49:47.309 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:49:47.316 cruncher.ratio() > best_ratio:
2025-07-01 05:49:47.322 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:49:47.327 if best_ratio < cutoff:
2025-07-01 05:49:47.332 # no non-identical "pretty close" pair
2025-07-01 05:49:47.337 if eqi is None:
2025-07-01 05:49:47.342 # no identical pair either -- treat it as a straight replace
2025-07-01 05:49:47.346 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:49:47.351 return
2025-07-01 05:49:47.357 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:49:47.366 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:49:47.373 else:
2025-07-01 05:49:47.379 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:49:47.386 eqi = None
2025-07-01 05:49:47.398
2025-07-01 05:49:47.410 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:49:47.418 # identical
2025-07-01 05:49:47.425
2025-07-01 05:49:47.435 # pump out diffs from before the synch point
2025-07-01 05:49:47.445 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:49:47.454
2025-07-01 05:49:47.461 # do intraline marking on the synch pair
2025-07-01 05:49:47.468 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:49:47.474 if eqi is None:
2025-07-01 05:49:47.480 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:49:47.486 atags = btags = ""
2025-07-01 05:49:47.492 cruncher.set_seqs(aelt, belt)
2025-07-01 05:49:47.498 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:49:47.504 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:49:47.515 if tag == 'replace':
2025-07-01 05:49:47.527 atags += '^' * la
2025-07-01 05:49:47.539 btags += '^' * lb
2025-07-01 05:49:47.547 elif tag == 'delete':
2025-07-01 05:49:47.555 atags += '-' * la
2025-07-01 05:49:47.568 elif tag == 'insert':
2025-07-01 05:49:47.577 btags += '+' * lb
2025-07-01 05:49:47.586 elif tag == 'equal':
2025-07-01 05:49:47.593 atags += ' ' * la
2025-07-01 05:49:47.600 btags += ' ' * lb
2025-07-01 05:49:47.607 else:
2025-07-01 05:49:47.614 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:49:47.620 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:49:47.628 else:
2025-07-01 05:49:47.633 # the synch pair is identical
2025-07-01 05:49:47.639 yield ' ' + aelt
2025-07-01 05:49:47.646
2025-07-01 05:49:47.652 # pump out diffs from after the synch point
2025-07-01 05:49:47.659 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:49:47.667
2025-07-01 05:49:47.679 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:49:47.687 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:49:47.694
2025-07-01 05:49:47.701 self = <difflib.Differ object at [hex]>
2025-07-01 05:49:47.708 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:49:47.714 alo = 393, ahi = 1101
2025-07-01 05:49:47.724 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:49:47.735 blo = 393, bhi = 1101
2025-07-01 05:49:47.745
2025-07-01 05:49:47.755 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:49:47.768 g = []
2025-07-01 05:49:47.777 if alo < ahi:
2025-07-01 05:49:47.785 if blo < bhi:
2025-07-01 05:49:47.792 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:49:47.799 else:
2025-07-01 05:49:47.812 g = self._dump('-', a, alo, ahi)
2025-07-01 05:49:47.821 elif blo < bhi:
2025-07-01 05:49:47.828 g = self._dump('+', b, blo, bhi)
2025-07-01 05:49:47.835
2025-07-01 05:49:47.843 > yield from g
2025-07-01 05:49:47.853
2025-07-01 05:49:47.862 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:49:47.870 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:49:47.879
2025-07-01 05:49:47.886 self = <difflib.Differ object at [hex]>
2025-07-01 05:49:47.894 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:49:47.901 alo = 393, ahi = 1101
2025-07-01 05:49:47.908 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:49:47.914 blo = 393, bhi = 1101
2025-07-01 05:49:47.919
2025-07-01 05:49:47.931 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:49:47.941 r"""
2025-07-01 05:49:47.949 When replacing one block of lines with another, search the blocks
2025-07-01 05:49:47.957 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:49:47.963 synch point, and intraline difference marking is done on the
2025-07-01 05:49:47.973 similar pair. Lots of work, but often worth it.
2025-07-01 05:49:47.984
2025-07-01 05:49:47.996 Example:
2025-07-01 05:49:48.007
2025-07-01 05:49:48.016 >>> d = Differ()
2025-07-01 05:49:48.024 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:49:48.031 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:49:48.039 >>> print(''.join(results), end="")
2025-07-01 05:49:48.046 - abcDefghiJkl
2025-07-01 05:49:48.067 + abcdefGhijkl
2025-07-01 05:49:48.087 """
2025-07-01 05:49:48.095
2025-07-01 05:49:48.102 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:49:48.113 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:49:48.122 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:49:48.129 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:49:48.136 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:49:48.141
2025-07-01 05:49:48.147 # search for the pair that matches best without being identical
2025-07-01 05:49:48.152 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:49:48.158 # on junk -- unless we have to)
2025-07-01 05:49:48.169 for j in range(blo, bhi):
2025-07-01 05:49:48.178 bj = b[j]
2025-07-01 05:49:48.186 cruncher.set_seq2(bj)
2025-07-01 05:49:48.195 for i in range(alo, ahi):
2025-07-01 05:49:48.206 ai = a[i]
2025-07-01 05:49:48.215 if ai == bj:
2025-07-01 05:49:48.226 if eqi is None:
2025-07-01 05:49:48.237 eqi, eqj = i, j
2025-07-01 05:49:48.249 continue
2025-07-01 05:49:48.260 cruncher.set_seq1(ai)
2025-07-01 05:49:48.273 # computing similarity is expensive, so use the quick
2025-07-01 05:49:48.283 # upper bounds first -- have seen this speed up messy
2025-07-01 05:49:48.291 # compares by a factor of 3.
2025-07-01 05:49:48.299 # note that ratio() is only expensive to compute the first
2025-07-01 05:49:48.306 # time it's called on a sequence pair; the expensive part
2025-07-01 05:49:48.312 # of the computation is cached by cruncher
2025-07-01 05:49:48.318 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:49:48.324 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:49:48.330 cruncher.ratio() > best_ratio:
2025-07-01 05:49:48.335 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:49:48.343 if best_ratio < cutoff:
2025-07-01 05:49:48.353 # no non-identical "pretty close" pair
2025-07-01 05:49:48.362 if eqi is None:
2025-07-01 05:49:48.371 # no identical pair either -- treat it as a straight replace
2025-07-01 05:49:48.380 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:49:48.391 return
2025-07-01 05:49:48.400 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:49:48.407 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:49:48.413 else:
2025-07-01 05:49:48.419 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:49:48.427 eqi = None
2025-07-01 05:49:48.439
2025-07-01 05:49:48.448 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:49:48.456 # identical
2025-07-01 05:49:48.462
2025-07-01 05:49:48.472 # pump out diffs from before the synch point
2025-07-01 05:49:48.485 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:49:48.496
2025-07-01 05:49:48.504 # do intraline marking on the synch pair
2025-07-01 05:49:48.511 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:49:48.517 if eqi is None:
2025-07-01 05:49:48.523 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:49:48.528 atags = btags = ""
2025-07-01 05:49:48.537 cruncher.set_seqs(aelt, belt)
2025-07-01 05:49:48.549 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:49:48.560 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:49:48.571 if tag == 'replace':
2025-07-01 05:49:48.580 atags += '^' * la
2025-07-01 05:49:48.587 btags += '^' * lb
2025-07-01 05:49:48.594 elif tag == 'delete':
2025-07-01 05:49:48.606 atags += '-' * la
2025-07-01 05:49:48.617 elif tag == 'insert':
2025-07-01 05:49:48.629 btags += '+' * lb
2025-07-01 05:49:48.641 elif tag == 'equal':
2025-07-01 05:49:48.652 atags += ' ' * la
2025-07-01 05:49:48.663 btags += ' ' * lb
2025-07-01 05:49:48.672 else:
2025-07-01 05:49:48.680 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:49:48.690 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:49:48.703 else:
2025-07-01 05:49:48.713 # the synch pair is identical
2025-07-01 05:49:48.720 yield ' ' + aelt
2025-07-01 05:49:48.727
2025-07-01 05:49:48.733 # pump out diffs from after the synch point
2025-07-01 05:49:48.739 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:49:48.744
2025-07-01 05:49:48.750 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:49:48.760 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:49:48.769
2025-07-01 05:49:48.775 self = <difflib.Differ object at [hex]>
2025-07-01 05:49:48.784 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:49:48.795 alo = 394, ahi = 1101
2025-07-01 05:49:48.805 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:49:48.814 blo = 394, bhi = 1101
2025-07-01 05:49:48.822
2025-07-01 05:49:48.830 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:49:48.836 g = []
2025-07-01 05:49:48.843 if alo < ahi:
2025-07-01 05:49:48.849 if blo < bhi:
2025-07-01 05:49:48.855 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:49:48.861 else:
2025-07-01 05:49:48.873 g = self._dump('-', a, alo, ahi)
2025-07-01 05:49:48.883 elif blo < bhi:
2025-07-01 05:49:48.891 g = self._dump('+', b, blo, bhi)
2025-07-01 05:49:48.903
2025-07-01 05:49:48.914 > yield from g
2025-07-01 05:49:48.923
2025-07-01 05:49:48.932 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:49:48.938 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:49:48.944
2025-07-01 05:49:48.950 self = <difflib.Differ object at [hex]>
2025-07-01 05:49:48.958 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:49:48.967 alo = 394, ahi = 1101
2025-07-01 05:49:48.979 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:49:48.989 blo = 394, bhi = 1101
2025-07-01 05:49:48.998
2025-07-01 05:49:49.011 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:49:49.022 r"""
2025-07-01 05:49:49.033 When replacing one block of lines with another, search the blocks
2025-07-01 05:49:49.046 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:49:49.057 synch point, and intraline difference marking is done on the
2025-07-01 05:49:49.069 similar pair. Lots of work, but often worth it.
2025-07-01 05:49:49.078
2025-07-01 05:49:49.085 Example:
2025-07-01 05:49:49.091
2025-07-01 05:49:49.101 >>> d = Differ()
2025-07-01 05:49:49.111 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:49:49.120 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:49:49.127 >>> print(''.join(results), end="")
2025-07-01 05:49:49.134 - abcDefghiJkl
2025-07-01 05:49:49.152 + abcdefGhijkl
2025-07-01 05:49:49.174 """
2025-07-01 05:49:49.181
2025-07-01 05:49:49.188 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:49:49.194 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:49:49.199 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:49:49.205 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:49:49.211 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:49:49.217
2025-07-01 05:49:49.224 # search for the pair that matches best without being identical
2025-07-01 05:49:49.231 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:49:49.239 # on junk -- unless we have to)
2025-07-01 05:49:49.247 for j in range(blo, bhi):
2025-07-01 05:49:49.254 bj = b[j]
2025-07-01 05:49:49.265 cruncher.set_seq2(bj)
2025-07-01 05:49:49.275 for i in range(alo, ahi):
2025-07-01 05:49:49.282 ai = a[i]
2025-07-01 05:49:49.291 if ai == bj:
2025-07-01 05:49:49.297 if eqi is None:
2025-07-01 05:49:49.308 eqi, eqj = i, j
2025-07-01 05:49:49.318 continue
2025-07-01 05:49:49.325 cruncher.set_seq1(ai)
2025-07-01 05:49:49.336 # computing similarity is expensive, so use the quick
2025-07-01 05:49:49.344 # upper bounds first -- have seen this speed up messy
2025-07-01 05:49:49.351 # compares by a factor of 3.
2025-07-01 05:49:49.360 # note that ratio() is only expensive to compute the first
2025-07-01 05:49:49.368 # time it's called on a sequence pair; the expensive part
2025-07-01 05:49:49.376 # of the computation is cached by cruncher
2025-07-01 05:49:49.383 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:49:49.389 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:49:49.396 cruncher.ratio() > best_ratio:
2025-07-01 05:49:49.405 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:49:49.414 if best_ratio < cutoff:
2025-07-01 05:49:49.421 # no non-identical "pretty close" pair
2025-07-01 05:49:49.427 if eqi is None:
2025-07-01 05:49:49.433 # no identical pair either -- treat it as a straight replace
2025-07-01 05:49:49.439 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:49:49.445 return
2025-07-01 05:49:49.451 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:49:49.457 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:49:49.463 else:
2025-07-01 05:49:49.470 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:49:49.481 eqi = None
2025-07-01 05:49:49.492
2025-07-01 05:49:49.502 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:49:49.516 # identical
2025-07-01 05:49:49.526
2025-07-01 05:49:49.534 # pump out diffs from before the synch point
2025-07-01 05:49:49.542 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:49:49.548
2025-07-01 05:49:49.554 # do intraline marking on the synch pair
2025-07-01 05:49:49.562 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:49:49.569 if eqi is None:
2025-07-01 05:49:49.576 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:49:49.581 atags = btags = ""
2025-07-01 05:49:49.586 cruncher.set_seqs(aelt, belt)
2025-07-01 05:49:49.591 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:49:49.596 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:49:49.602 if tag == 'replace':
2025-07-01 05:49:49.608 atags += '^' * la
2025-07-01 05:49:49.615 btags += '^' * lb
2025-07-01 05:49:49.620 elif tag == 'delete':
2025-07-01 05:49:49.629 atags += '-' * la
2025-07-01 05:49:49.636 elif tag == 'insert':
2025-07-01 05:49:49.643 btags += '+' * lb
2025-07-01 05:49:49.651 elif tag == 'equal':
2025-07-01 05:49:49.662 atags += ' ' * la
2025-07-01 05:49:49.671 btags += ' ' * lb
2025-07-01 05:49:49.678 else:
2025-07-01 05:49:49.685 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:49:49.692 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:49:49.698 else:
2025-07-01 05:49:49.711 # the synch pair is identical
2025-07-01 05:49:49.722 yield ' ' + aelt
2025-07-01 05:49:49.731
2025-07-01 05:49:49.743 # pump out diffs from after the synch point
2025-07-01 05:49:49.753 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:49:49.761
2025-07-01 05:49:49.768 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:49:49.775 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:49:49.781
2025-07-01 05:49:49.786 self = <difflib.Differ object at [hex]>
2025-07-01 05:49:49.793 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:49:49.800 alo = 395, ahi = 1101
2025-07-01 05:49:49.809 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:49:49.821 blo = 395, bhi = 1101
2025-07-01 05:49:49.832
2025-07-01 05:49:49.840 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:49:49.847 g = []
2025-07-01 05:49:49.854 if alo < ahi:
2025-07-01 05:49:49.861 if blo < bhi:
2025-07-01 05:49:49.866 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:49:49.872 else:
2025-07-01 05:49:49.878 g = self._dump('-', a, alo, ahi)
2025-07-01 05:49:49.888 elif blo < bhi:
2025-07-01 05:49:49.898 g = self._dump('+', b, blo, bhi)
2025-07-01 05:49:49.905
2025-07-01 05:49:49.912 > yield from g
2025-07-01 05:49:49.919
2025-07-01 05:49:49.929 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:49:49.938 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:49:49.945
2025-07-01 05:49:49.951 self = <difflib.Differ object at [hex]>
2025-07-01 05:49:49.959 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:49:49.964 alo = 395, ahi = 1101
2025-07-01 05:49:49.972 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:49:49.979 blo = 395, bhi = 1101
2025-07-01 05:49:49.988
2025-07-01 05:49:49.996 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:49:50.003 r"""
2025-07-01 05:49:50.011 When replacing one block of lines with another, search the blocks
2025-07-01 05:49:50.018 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:49:50.026 synch point, and intraline difference marking is done on the
2025-07-01 05:49:50.034 similar pair. Lots of work, but often worth it.
2025-07-01 05:49:50.044
2025-07-01 05:49:50.053 Example:
2025-07-01 05:49:50.060
2025-07-01 05:49:50.068 >>> d = Differ()
2025-07-01 05:49:50.075 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:49:50.084 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:49:50.093 >>> print(''.join(results), end="")
2025-07-01 05:49:50.103 - abcDefghiJkl
2025-07-01 05:49:50.125 + abcdefGhijkl
2025-07-01 05:49:50.139 """
2025-07-01 05:49:50.145
2025-07-01 05:49:50.150 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:49:50.155 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:49:50.159 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:49:50.164 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:49:50.168 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:49:50.173
2025-07-01 05:49:50.177 # search for the pair that matches best without being identical
2025-07-01 05:49:50.182 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:49:50.186 # on junk -- unless we have to)
2025-07-01 05:49:50.191 for j in range(blo, bhi):
2025-07-01 05:49:50.195 bj = b[j]
2025-07-01 05:49:50.199 cruncher.set_seq2(bj)
2025-07-01 05:49:50.204 for i in range(alo, ahi):
2025-07-01 05:49:50.208 ai = a[i]
2025-07-01 05:49:50.213 if ai == bj:
2025-07-01 05:49:50.219 if eqi is None:
2025-07-01 05:49:50.224 eqi, eqj = i, j
2025-07-01 05:49:50.231 continue
2025-07-01 05:49:50.240 cruncher.set_seq1(ai)
2025-07-01 05:49:50.251 # computing similarity is expensive, so use the quick
2025-07-01 05:49:50.261 # upper bounds first -- have seen this speed up messy
2025-07-01 05:49:50.273 # compares by a factor of 3.
2025-07-01 05:49:50.286 # note that ratio() is only expensive to compute the first
2025-07-01 05:49:50.294 # time it's called on a sequence pair; the expensive part
2025-07-01 05:49:50.301 # of the computation is cached by cruncher
2025-07-01 05:49:50.312 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:49:50.321 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:49:50.331 cruncher.ratio() > best_ratio:
2025-07-01 05:49:50.343 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:49:50.352 if best_ratio < cutoff:
2025-07-01 05:49:50.365 # no non-identical "pretty close" pair
2025-07-01 05:49:50.375 if eqi is None:
2025-07-01 05:49:50.384 # no identical pair either -- treat it as a straight replace
2025-07-01 05:49:50.391 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:49:50.397 return
2025-07-01 05:49:50.403 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:49:50.411 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:49:50.423 else:
2025-07-01 05:49:50.431 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:49:50.443 eqi = None
2025-07-01 05:49:50.452
2025-07-01 05:49:50.459 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:49:50.466 # identical
2025-07-01 05:49:50.474
2025-07-01 05:49:50.485 # pump out diffs from before the synch point
2025-07-01 05:49:50.495 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:49:50.508
2025-07-01 05:49:50.520 # do intraline marking on the synch pair
2025-07-01 05:49:50.528 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:49:50.535 if eqi is None:
2025-07-01 05:49:50.543 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:49:50.554 atags = btags = ""
2025-07-01 05:49:50.562 cruncher.set_seqs(aelt, belt)
2025-07-01 05:49:50.569 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:49:50.576 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:49:50.585 if tag == 'replace':
2025-07-01 05:49:50.592 atags += '^' * la
2025-07-01 05:49:50.599 btags += '^' * lb
2025-07-01 05:49:50.605 elif tag == 'delete':
2025-07-01 05:49:50.618 atags += '-' * la
2025-07-01 05:49:50.629 elif tag == 'insert':
2025-07-01 05:49:50.638 btags += '+' * lb
2025-07-01 05:49:50.647 elif tag == 'equal':
2025-07-01 05:49:50.660 atags += ' ' * la
2025-07-01 05:49:50.671 btags += ' ' * lb
2025-07-01 05:49:50.679 else:
2025-07-01 05:49:50.690 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:49:50.699 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:49:50.707 else:
2025-07-01 05:49:50.717 # the synch pair is identical
2025-07-01 05:49:50.729 yield ' ' + aelt
2025-07-01 05:49:50.740
2025-07-01 05:49:50.751 # pump out diffs from after the synch point
2025-07-01 05:49:50.760 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:49:50.767
2025-07-01 05:49:50.774 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:49:50.783 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:49:50.794
2025-07-01 05:49:50.802 self = <difflib.Differ object at [hex]>
2025-07-01 05:49:50.811 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:49:50.822 alo = 396, ahi = 1101
2025-07-01 05:49:50.837 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:49:50.845 blo = 396, bhi = 1101
2025-07-01 05:49:50.852
2025-07-01 05:49:50.859 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:49:50.869 g = []
2025-07-01 05:49:50.879 if alo < ahi:
2025-07-01 05:49:50.890 if blo < bhi:
2025-07-01 05:49:50.901 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:49:50.915 else:
2025-07-01 05:49:50.926 g = self._dump('-', a, alo, ahi)
2025-07-01 05:49:50.932 elif blo < bhi:
2025-07-01 05:49:50.939 g = self._dump('+', b, blo, bhi)
2025-07-01 05:49:50.945
2025-07-01 05:49:50.950 > yield from g
2025-07-01 05:49:50.956
2025-07-01 05:49:50.963 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:49:50.973 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:49:50.981
2025-07-01 05:49:50.989 self = <difflib.Differ object at [hex]>
2025-07-01 05:49:50.999 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:49:51.009 alo = 396, ahi = 1101
2025-07-01 05:49:51.017 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:49:51.023 blo = 396, bhi = 1101
2025-07-01 05:49:51.029
2025-07-01 05:49:51.035 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:49:51.041 r"""
2025-07-01 05:49:51.048 When replacing one block of lines with another, search the blocks
2025-07-01 05:49:51.054 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:49:51.059 synch point, and intraline difference marking is done on the
2025-07-01 05:49:51.067 similar pair. Lots of work, but often worth it.
2025-07-01 05:49:51.079
2025-07-01 05:49:51.088 Example:
2025-07-01 05:49:51.098
2025-07-01 05:49:51.106 >>> d = Differ()
2025-07-01 05:49:51.113 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:49:51.121 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:49:51.129 >>> print(''.join(results), end="")
2025-07-01 05:49:51.135 - abcDefghiJkl
2025-07-01 05:49:51.146 + abcdefGhijkl
2025-07-01 05:49:51.156 """
2025-07-01 05:49:51.165
2025-07-01 05:49:51.175 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:49:51.184 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:49:51.193 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:49:51.201 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:49:51.208 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:49:51.214
2025-07-01 05:49:51.220 # search for the pair that matches best without being identical
2025-07-01 05:49:51.226 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:49:51.232 # on junk -- unless we have to)
2025-07-01 05:49:51.244 for j in range(blo, bhi):
2025-07-01 05:49:51.254 bj = b[j]
2025-07-01 05:49:51.260 cruncher.set_seq2(bj)
2025-07-01 05:49:51.267 for i in range(alo, ahi):
2025-07-01 05:49:51.280 ai = a[i]
2025-07-01 05:49:51.287 if ai == bj:
2025-07-01 05:49:51.295 if eqi is None:
2025-07-01 05:49:51.307 eqi, eqj = i, j
2025-07-01 05:49:51.314 continue
2025-07-01 05:49:51.320 cruncher.set_seq1(ai)
2025-07-01 05:49:51.326 # computing similarity is expensive, so use the quick
2025-07-01 05:49:51.332 # upper bounds first -- have seen this speed up messy
2025-07-01 05:49:51.340 # compares by a factor of 3.
2025-07-01 05:49:51.348 # note that ratio() is only expensive to compute the first
2025-07-01 05:49:51.354 # time it's called on a sequence pair; the expensive part
2025-07-01 05:49:51.360 # of the computation is cached by cruncher
2025-07-01 05:49:51.366 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:49:51.372 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:49:51.379 cruncher.ratio() > best_ratio:
2025-07-01 05:49:51.386 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:49:51.392 if best_ratio < cutoff:
2025-07-01 05:49:51.398 # no non-identical "pretty close" pair
2025-07-01 05:49:51.403 if eqi is None:
2025-07-01 05:49:51.411 # no identical pair either -- treat it as a straight replace
2025-07-01 05:49:51.423 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:49:51.431 return
2025-07-01 05:49:51.437 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:49:51.443 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:49:51.450 else:
2025-07-01 05:49:51.456 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:49:51.462 eqi = None
2025-07-01 05:49:51.475
2025-07-01 05:49:51.487 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:49:51.495 # identical
2025-07-01 05:49:51.502
2025-07-01 05:49:51.507 # pump out diffs from before the synch point
2025-07-01 05:49:51.512 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:49:51.525
2025-07-01 05:49:51.533 # do intraline marking on the synch pair
2025-07-01 05:49:51.540 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:49:51.547 if eqi is None:
2025-07-01 05:49:51.555 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:49:51.564 atags = btags = ""
2025-07-01 05:49:51.572 cruncher.set_seqs(aelt, belt)
2025-07-01 05:49:51.580 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:49:51.590 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:49:51.603 if tag == 'replace':
2025-07-01 05:49:51.614 atags += '^' * la
2025-07-01 05:49:51.621 btags += '^' * lb
2025-07-01 05:49:51.627 elif tag == 'delete':
2025-07-01 05:49:51.634 atags += '-' * la
2025-07-01 05:49:51.640 elif tag == 'insert':
2025-07-01 05:49:51.647 btags += '+' * lb
2025-07-01 05:49:51.654 elif tag == 'equal':
2025-07-01 05:49:51.666 atags += ' ' * la
2025-07-01 05:49:51.674 btags += ' ' * lb
2025-07-01 05:49:51.681 else:
2025-07-01 05:49:51.687 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:49:51.692 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:49:51.696 else:
2025-07-01 05:49:51.701 # the synch pair is identical
2025-07-01 05:49:51.706 yield ' ' + aelt
2025-07-01 05:49:51.712
2025-07-01 05:49:51.718 # pump out diffs from after the synch point
2025-07-01 05:49:51.724 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:49:51.731
2025-07-01 05:49:51.740 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:49:51.749 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:49:51.756
2025-07-01 05:49:51.763 self = <difflib.Differ object at [hex]>
2025-07-01 05:49:51.776 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:49:51.785 alo = 397, ahi = 1101
2025-07-01 05:49:51.797 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:49:51.806 blo = 397, bhi = 1101
2025-07-01 05:49:51.813
2025-07-01 05:49:51.825 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:49:51.834 g = []
2025-07-01 05:49:51.842 if alo < ahi:
2025-07-01 05:49:51.849 if blo < bhi:
2025-07-01 05:49:51.860 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:49:51.871 else:
2025-07-01 05:49:51.880 g = self._dump('-', a, alo, ahi)
2025-07-01 05:49:51.887 elif blo < bhi:
2025-07-01 05:49:51.895 g = self._dump('+', b, blo, bhi)
2025-07-01 05:49:51.906
2025-07-01 05:49:51.916 > yield from g
2025-07-01 05:49:51.924
2025-07-01 05:49:51.931 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:49:51.938 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:49:51.943
2025-07-01 05:49:51.951 self = <difflib.Differ object at [hex]>
2025-07-01 05:49:51.961 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:49:51.970 alo = 397, ahi = 1101
2025-07-01 05:49:51.977 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:49:51.983 blo = 397, bhi = 1101
2025-07-01 05:49:51.987
2025-07-01 05:49:51.991 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:49:51.996 r"""
2025-07-01 05:49:52.000 When replacing one block of lines with another, search the blocks
2025-07-01 05:49:52.005 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:49:52.009 synch point, and intraline difference marking is done on the
2025-07-01 05:49:52.013 similar pair. Lots of work, but often worth it.
2025-07-01 05:49:52.018
2025-07-01 05:49:52.022 Example:
2025-07-01 05:49:52.026
2025-07-01 05:49:52.031 >>> d = Differ()
2025-07-01 05:49:52.035 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:49:52.039 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:49:52.044 >>> print(''.join(results), end="")
2025-07-01 05:49:52.048 - abcDefghiJkl
2025-07-01 05:49:52.057 + abcdefGhijkl
2025-07-01 05:49:52.065 """
2025-07-01 05:49:52.069
2025-07-01 05:49:52.074 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:49:52.078 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:49:52.082 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:49:52.087 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:49:52.091 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:49:52.095
2025-07-01 05:49:52.100 # search for the pair that matches best without being identical
2025-07-01 05:49:52.107 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:49:52.118 # on junk -- unless we have to)
2025-07-01 05:49:52.126 for j in range(blo, bhi):
2025-07-01 05:49:52.135 bj = b[j]
2025-07-01 05:49:52.145 cruncher.set_seq2(bj)
2025-07-01 05:49:52.152 for i in range(alo, ahi):
2025-07-01 05:49:52.157 ai = a[i]
2025-07-01 05:49:52.162 if ai == bj:
2025-07-01 05:49:52.166 if eqi is None:
2025-07-01 05:49:52.170 eqi, eqj = i, j
2025-07-01 05:49:52.175 continue
2025-07-01 05:49:52.182 cruncher.set_seq1(ai)
2025-07-01 05:49:52.189 # computing similarity is expensive, so use the quick
2025-07-01 05:49:52.195 # upper bounds first -- have seen this speed up messy
2025-07-01 05:49:52.200 # compares by a factor of 3.
2025-07-01 05:49:52.205 # note that ratio() is only expensive to compute the first
2025-07-01 05:49:52.209 # time it's called on a sequence pair; the expensive part
2025-07-01 05:49:52.213 # of the computation is cached by cruncher
2025-07-01 05:49:52.218 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:49:52.222 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:49:52.227 cruncher.ratio() > best_ratio:
2025-07-01 05:49:52.231 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:49:52.235 if best_ratio < cutoff:
2025-07-01 05:49:52.240 # no non-identical "pretty close" pair
2025-07-01 05:49:52.244 if eqi is None:
2025-07-01 05:49:52.250 # no identical pair either -- treat it as a straight replace
2025-07-01 05:49:52.256 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:49:52.262 return
2025-07-01 05:49:52.275 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:49:52.285 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:49:52.293 else:
2025-07-01 05:49:52.300 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:49:52.306 eqi = None
2025-07-01 05:49:52.311
2025-07-01 05:49:52.317 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:49:52.323 # identical
2025-07-01 05:49:52.329
2025-07-01 05:49:52.335 # pump out diffs from before the synch point
2025-07-01 05:49:52.342 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:49:52.348
2025-07-01 05:49:52.354 # do intraline marking on the synch pair
2025-07-01 05:49:52.362 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:49:52.373 if eqi is None:
2025-07-01 05:49:52.382 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:49:52.390 atags = btags = ""
2025-07-01 05:49:52.401 cruncher.set_seqs(aelt, belt)
2025-07-01 05:49:52.410 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:49:52.418 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:49:52.428 if tag == 'replace':
2025-07-01 05:49:52.439 atags += '^' * la
2025-07-01 05:49:52.447 btags += '^' * lb
2025-07-01 05:49:52.454 elif tag == 'delete':
2025-07-01 05:49:52.461 atags += '-' * la
2025-07-01 05:49:52.467 elif tag == 'insert':
2025-07-01 05:49:52.472 btags += '+' * lb
2025-07-01 05:49:52.478 elif tag == 'equal':
2025-07-01 05:49:52.488 atags += ' ' * la
2025-07-01 05:49:52.500 btags += ' ' * lb
2025-07-01 05:49:52.511 else:
2025-07-01 05:49:52.519 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:49:52.526 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:49:52.531 else:
2025-07-01 05:49:52.538 # the synch pair is identical
2025-07-01 05:49:52.549 yield ' ' + aelt
2025-07-01 05:49:52.556
2025-07-01 05:49:52.564 # pump out diffs from after the synch point
2025-07-01 05:49:52.573 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:49:52.583
2025-07-01 05:49:52.593 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:49:52.604 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:49:52.614
2025-07-01 05:49:52.623 self = <difflib.Differ object at [hex]>
2025-07-01 05:49:52.636 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:49:52.645 alo = 400, ahi = 1101
2025-07-01 05:49:52.658 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:49:52.669 blo = 400, bhi = 1101
2025-07-01 05:49:52.679
2025-07-01 05:49:52.688 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:49:52.697 g = []
2025-07-01 05:49:52.708 if alo < ahi:
2025-07-01 05:49:52.717 if blo < bhi:
2025-07-01 05:49:52.724 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:49:52.736 else:
2025-07-01 05:49:52.745 g = self._dump('-', a, alo, ahi)
2025-07-01 05:49:52.752 elif blo < bhi:
2025-07-01 05:49:52.759 g = self._dump('+', b, blo, bhi)
2025-07-01 05:49:52.769
2025-07-01 05:49:52.776 > yield from g
2025-07-01 05:49:52.782
2025-07-01 05:49:52.787 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:49:52.792 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:49:52.796
2025-07-01 05:49:52.800 self = <difflib.Differ object at [hex]>
2025-07-01 05:49:52.805 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:49:52.810 alo = 400, ahi = 1101
2025-07-01 05:49:52.814 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:49:52.819 blo = 400, bhi = 1101
2025-07-01 05:49:52.823
2025-07-01 05:49:52.827 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:49:52.831 r"""
2025-07-01 05:49:52.836 When replacing one block of lines with another, search the blocks
2025-07-01 05:49:52.840 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:49:52.845 synch point, and intraline difference marking is done on the
2025-07-01 05:49:52.849 similar pair. Lots of work, but often worth it.
2025-07-01 05:49:52.853
2025-07-01 05:49:52.857 Example:
2025-07-01 05:49:52.862
2025-07-01 05:49:52.866 >>> d = Differ()
2025-07-01 05:49:52.870 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:49:52.875 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:49:52.879 >>> print(''.join(results), end="")
2025-07-01 05:49:52.884 - abcDefghiJkl
2025-07-01 05:49:52.892 + abcdefGhijkl
2025-07-01 05:49:52.901 """
2025-07-01 05:49:52.907
2025-07-01 05:49:52.913 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:49:52.920 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:49:52.926 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:49:52.933 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:49:52.940 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:49:52.947
2025-07-01 05:49:52.954 # search for the pair that matches best without being identical
2025-07-01 05:49:52.967 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:49:52.977 # on junk -- unless we have to)
2025-07-01 05:49:52.985 for j in range(blo, bhi):
2025-07-01 05:49:52.992 bj = b[j]
2025-07-01 05:49:52.998 cruncher.set_seq2(bj)
2025-07-01 05:49:53.004 for i in range(alo, ahi):
2025-07-01 05:49:53.008 ai = a[i]
2025-07-01 05:49:53.013 if ai == bj:
2025-07-01 05:49:53.018 if eqi is None:
2025-07-01 05:49:53.022 eqi, eqj = i, j
2025-07-01 05:49:53.027 continue
2025-07-01 05:49:53.032 cruncher.set_seq1(ai)
2025-07-01 05:49:53.038 # computing similarity is expensive, so use the quick
2025-07-01 05:49:53.047 # upper bounds first -- have seen this speed up messy
2025-07-01 05:49:53.057 # compares by a factor of 3.
2025-07-01 05:49:53.064 # note that ratio() is only expensive to compute the first
2025-07-01 05:49:53.070 # time it's called on a sequence pair; the expensive part
2025-07-01 05:49:53.076 # of the computation is cached by cruncher
2025-07-01 05:49:53.086 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:49:53.098 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:49:53.109 cruncher.ratio() > best_ratio:
2025-07-01 05:49:53.123 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:49:53.135 if best_ratio < cutoff:
2025-07-01 05:49:53.143 # no non-identical "pretty close" pair
2025-07-01 05:49:53.153 if eqi is None:
2025-07-01 05:49:53.166 # no identical pair either -- treat it as a straight replace
2025-07-01 05:49:53.176 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:49:53.188 return
2025-07-01 05:49:53.199 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:49:53.207 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:49:53.214 else:
2025-07-01 05:49:53.220 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:49:53.226 eqi = None
2025-07-01 05:49:53.236
2025-07-01 05:49:53.247 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:49:53.254 # identical
2025-07-01 05:49:53.264
2025-07-01 05:49:53.273 # pump out diffs from before the synch point
2025-07-01 05:49:53.281 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:49:53.287
2025-07-01 05:49:53.294 # do intraline marking on the synch pair
2025-07-01 05:49:53.300 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:49:53.306 if eqi is None:
2025-07-01 05:49:53.312 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:49:53.318 atags = btags = ""
2025-07-01 05:49:53.323 cruncher.set_seqs(aelt, belt)
2025-07-01 05:49:53.337 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:49:53.345 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:49:53.351 if tag == 'replace':
2025-07-01 05:49:53.360 atags += '^' * la
2025-07-01 05:49:53.370 btags += '^' * lb
2025-07-01 05:49:53.376 elif tag == 'delete':
2025-07-01 05:49:53.381 atags += '-' * la
2025-07-01 05:49:53.388 elif tag == 'insert':
2025-07-01 05:49:53.393 btags += '+' * lb
2025-07-01 05:49:53.399 elif tag == 'equal':
2025-07-01 05:49:53.404 atags += ' ' * la
2025-07-01 05:49:53.410 btags += ' ' * lb
2025-07-01 05:49:53.415 else:
2025-07-01 05:49:53.422 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:49:53.428 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:49:53.434 else:
2025-07-01 05:49:53.446 # the synch pair is identical
2025-07-01 05:49:53.455 yield ' ' + aelt
2025-07-01 05:49:53.463
2025-07-01 05:49:53.471 # pump out diffs from after the synch point
2025-07-01 05:49:53.482 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:49:53.491
2025-07-01 05:49:53.500 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:49:53.509 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:49:53.515
2025-07-01 05:49:53.521 self = <difflib.Differ object at [hex]>
2025-07-01 05:49:53.533 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:49:53.541 alo = 401, ahi = 1101
2025-07-01 05:49:53.549 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:49:53.555 blo = 401, bhi = 1101
2025-07-01 05:49:53.568
2025-07-01 05:49:53.577 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:49:53.585 g = []
2025-07-01 05:49:53.592 if alo < ahi:
2025-07-01 05:49:53.602 if blo < bhi:
2025-07-01 05:49:53.612 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:49:53.623 else:
2025-07-01 05:49:53.634 g = self._dump('-', a, alo, ahi)
2025-07-01 05:49:53.645 elif blo < bhi:
2025-07-01 05:49:53.655 g = self._dump('+', b, blo, bhi)
2025-07-01 05:49:53.663
2025-07-01 05:49:53.671 > yield from g
2025-07-01 05:49:53.682
2025-07-01 05:49:53.691 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:49:53.699 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:49:53.710
2025-07-01 05:49:53.719 self = <difflib.Differ object at [hex]>
2025-07-01 05:49:53.732 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:49:53.742 alo = 401, ahi = 1101
2025-07-01 05:49:53.751 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:49:53.758 blo = 401, bhi = 1101
2025-07-01 05:49:53.768
2025-07-01 05:49:53.778 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:49:53.787 r"""
2025-07-01 05:49:53.798 When replacing one block of lines with another, search the blocks
2025-07-01 05:49:53.809 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:49:53.821 synch point, and intraline difference marking is done on the
2025-07-01 05:49:53.832 similar pair. Lots of work, but often worth it.
2025-07-01 05:49:53.843
2025-07-01 05:49:53.852 Example:
2025-07-01 05:49:53.859
2025-07-01 05:49:53.866 >>> d = Differ()
2025-07-01 05:49:53.871 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:49:53.876 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:49:53.880 >>> print(''.join(results), end="")
2025-07-01 05:49:53.884 - abcDefghiJkl
2025-07-01 05:49:53.893 + abcdefGhijkl
2025-07-01 05:49:53.902 """
2025-07-01 05:49:53.906
2025-07-01 05:49:53.911 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:49:53.915 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:49:53.919 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:49:53.924 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:49:53.928 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:49:53.934
2025-07-01 05:49:53.940 # search for the pair that matches best without being identical
2025-07-01 05:49:53.947 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:49:53.957 # on junk -- unless we have to)
2025-07-01 05:49:53.968 for j in range(blo, bhi):
2025-07-01 05:49:53.979 bj = b[j]
2025-07-01 05:49:53.992 cruncher.set_seq2(bj)
2025-07-01 05:49:54.002 for i in range(alo, ahi):
2025-07-01 05:49:54.010 ai = a[i]
2025-07-01 05:49:54.019 if ai == bj:
2025-07-01 05:49:54.027 if eqi is None:
2025-07-01 05:49:54.035 eqi, eqj = i, j
2025-07-01 05:49:54.041 continue
2025-07-01 05:49:54.047 cruncher.set_seq1(ai)
2025-07-01 05:49:54.059 # computing similarity is expensive, so use the quick
2025-07-01 05:49:54.072 # upper bounds first -- have seen this speed up messy
2025-07-01 05:49:54.082 # compares by a factor of 3.
2025-07-01 05:49:54.093 # note that ratio() is only expensive to compute the first
2025-07-01 05:49:54.103 # time it's called on a sequence pair; the expensive part
2025-07-01 05:49:54.112 # of the computation is cached by cruncher
2025-07-01 05:49:54.120 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:49:54.126 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:49:54.137 cruncher.ratio() > best_ratio:
2025-07-01 05:49:54.148 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:49:54.157 if best_ratio < cutoff:
2025-07-01 05:49:54.163 # no non-identical "pretty close" pair
2025-07-01 05:49:54.171 if eqi is None:
2025-07-01 05:49:54.181 # no identical pair either -- treat it as a straight replace
2025-07-01 05:49:54.190 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:49:54.197 return
2025-07-01 05:49:54.211 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:49:54.221 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:49:54.231 else:
2025-07-01 05:49:54.243 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:49:54.252 eqi = None
2025-07-01 05:49:54.259
2025-07-01 05:49:54.266 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:49:54.276 # identical
2025-07-01 05:49:54.285
2025-07-01 05:49:54.294 # pump out diffs from before the synch point
2025-07-01 05:49:54.305 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:49:54.316
2025-07-01 05:49:54.326 # do intraline marking on the synch pair
2025-07-01 05:49:54.339 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:49:54.350 if eqi is None:
2025-07-01 05:49:54.360 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:49:54.367 atags = btags = ""
2025-07-01 05:49:54.372 cruncher.set_seqs(aelt, belt)
2025-07-01 05:49:54.378 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:49:54.390 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:49:54.397 if tag == 'replace':
2025-07-01 05:49:54.403 atags += '^' * la
2025-07-01 05:49:54.409 btags += '^' * lb
2025-07-01 05:49:54.414 elif tag == 'delete':
2025-07-01 05:49:54.419 atags += '-' * la
2025-07-01 05:49:54.425 elif tag == 'insert':
2025-07-01 05:49:54.434 btags += '+' * lb
2025-07-01 05:49:54.444 elif tag == 'equal':
2025-07-01 05:49:54.453 atags += ' ' * la
2025-07-01 05:49:54.462 btags += ' ' * lb
2025-07-01 05:49:54.468 else:
2025-07-01 05:49:54.475 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:49:54.483 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:49:54.494 else:
2025-07-01 05:49:54.505 # the synch pair is identical
2025-07-01 05:49:54.515 yield ' ' + aelt
2025-07-01 05:49:54.523
2025-07-01 05:49:54.531 # pump out diffs from after the synch point
2025-07-01 05:49:54.539 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:49:54.549
2025-07-01 05:49:54.558 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:49:54.565 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:49:54.570
2025-07-01 05:49:54.575 self = <difflib.Differ object at [hex]>
2025-07-01 05:49:54.580 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:49:54.585 alo = 402, ahi = 1101
2025-07-01 05:49:54.593 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:49:54.598 blo = 402, bhi = 1101
2025-07-01 05:49:54.609
2025-07-01 05:49:54.619 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:49:54.626 g = []
2025-07-01 05:49:54.633 if alo < ahi:
2025-07-01 05:49:54.639 if blo < bhi:
2025-07-01 05:49:54.645 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:49:54.651 else:
2025-07-01 05:49:54.657 g = self._dump('-', a, alo, ahi)
2025-07-01 05:49:54.663 elif blo < bhi:
2025-07-01 05:49:54.668 g = self._dump('+', b, blo, bhi)
2025-07-01 05:49:54.674
2025-07-01 05:49:54.679 > yield from g
2025-07-01 05:49:54.684
2025-07-01 05:49:54.691 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:49:54.699 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:49:54.707
2025-07-01 05:49:54.714 self = <difflib.Differ object at [hex]>
2025-07-01 05:49:54.720 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:49:54.726 alo = 402, ahi = 1101
2025-07-01 05:49:54.737 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:49:54.747 blo = 402, bhi = 1101
2025-07-01 05:49:54.754
2025-07-01 05:49:54.762 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:49:54.773 r"""
2025-07-01 05:49:54.784 When replacing one block of lines with another, search the blocks
2025-07-01 05:49:54.794 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:49:54.806 synch point, and intraline difference marking is done on the
2025-07-01 05:49:54.819 similar pair. Lots of work, but often worth it.
2025-07-01 05:49:54.828
2025-07-01 05:49:54.835 Example:
2025-07-01 05:49:54.841
2025-07-01 05:49:54.847 >>> d = Differ()
2025-07-01 05:49:54.853 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:49:54.859 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:49:54.864 >>> print(''.join(results), end="")
2025-07-01 05:49:54.869 - abcDefghiJkl
2025-07-01 05:49:54.882 + abcdefGhijkl
2025-07-01 05:49:54.899 """
2025-07-01 05:49:54.906
2025-07-01 05:49:54.913 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:49:54.919 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:49:54.924 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:49:54.930 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:49:54.935 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:49:54.941
2025-07-01 05:49:54.951 # search for the pair that matches best without being identical
2025-07-01 05:49:54.957 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:49:54.965 # on junk -- unless we have to)
2025-07-01 05:49:54.974 for j in range(blo, bhi):
2025-07-01 05:49:54.980 bj = b[j]
2025-07-01 05:49:54.986 cruncher.set_seq2(bj)
2025-07-01 05:49:54.991 for i in range(alo, ahi):
2025-07-01 05:49:54.999 ai = a[i]
2025-07-01 05:49:55.011 if ai == bj:
2025-07-01 05:49:55.022 if eqi is None:
2025-07-01 05:49:55.029 eqi, eqj = i, j
2025-07-01 05:49:55.035 continue
2025-07-01 05:49:55.042 cruncher.set_seq1(ai)
2025-07-01 05:49:55.053 # computing similarity is expensive, so use the quick
2025-07-01 05:49:55.067 # upper bounds first -- have seen this speed up messy
2025-07-01 05:49:55.078 # compares by a factor of 3.
2025-07-01 05:49:55.086 # note that ratio() is only expensive to compute the first
2025-07-01 05:49:55.093 # time it's called on a sequence pair; the expensive part
2025-07-01 05:49:55.103 # of the computation is cached by cruncher
2025-07-01 05:49:55.111 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:49:55.118 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:49:55.123 cruncher.ratio() > best_ratio:
2025-07-01 05:49:55.128 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:49:55.133 if best_ratio < cutoff:
2025-07-01 05:49:55.138 # no non-identical "pretty close" pair
2025-07-01 05:49:55.148 if eqi is None:
2025-07-01 05:49:55.157 # no identical pair either -- treat it as a straight replace
2025-07-01 05:49:55.169 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:49:55.180 return
2025-07-01 05:49:55.193 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:49:55.203 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:49:55.211 else:
2025-07-01 05:49:55.216 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:49:55.222 eqi = None
2025-07-01 05:49:55.226
2025-07-01 05:49:55.231 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:49:55.236 # identical
2025-07-01 05:49:55.241
2025-07-01 05:49:55.248 # pump out diffs from before the synch point
2025-07-01 05:49:55.253 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:49:55.259
2025-07-01 05:49:55.267 # do intraline marking on the synch pair
2025-07-01 05:49:55.277 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:49:55.287 if eqi is None:
2025-07-01 05:49:55.297 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:49:55.306 atags = btags = ""
2025-07-01 05:49:55.314 cruncher.set_seqs(aelt, belt)
2025-07-01 05:49:55.320 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:49:55.327 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:49:55.336 if tag == 'replace':
2025-07-01 05:49:55.346 atags += '^' * la
2025-07-01 05:49:55.353 btags += '^' * lb
2025-07-01 05:49:55.365 elif tag == 'delete':
2025-07-01 05:49:55.373 atags += '-' * la
2025-07-01 05:49:55.380 elif tag == 'insert':
2025-07-01 05:49:55.386 btags += '+' * lb
2025-07-01 05:49:55.391 elif tag == 'equal':
2025-07-01 05:49:55.395 atags += ' ' * la
2025-07-01 05:49:55.400 btags += ' ' * lb
2025-07-01 05:49:55.405 else:
2025-07-01 05:49:55.413 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:49:55.419 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:49:55.424 else:
2025-07-01 05:49:55.429 # the synch pair is identical
2025-07-01 05:49:55.434 yield ' ' + aelt
2025-07-01 05:49:55.442
2025-07-01 05:49:55.449 # pump out diffs from after the synch point
2025-07-01 05:49:55.455 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:49:55.465
2025-07-01 05:49:55.472 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:49:55.477 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:49:55.482
2025-07-01 05:49:55.487 self = <difflib.Differ object at [hex]>
2025-07-01 05:49:55.492 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:49:55.498 alo = 403, ahi = 1101
2025-07-01 05:49:55.504 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:49:55.510 blo = 403, bhi = 1101
2025-07-01 05:49:55.516
2025-07-01 05:49:55.522 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:49:55.528 g = []
2025-07-01 05:49:55.534 if alo < ahi:
2025-07-01 05:49:55.541 if blo < bhi:
2025-07-01 05:49:55.548 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:49:55.555 else:
2025-07-01 05:49:55.562 g = self._dump('-', a, alo, ahi)
2025-07-01 05:49:55.568 elif blo < bhi:
2025-07-01 05:49:55.575 g = self._dump('+', b, blo, bhi)
2025-07-01 05:49:55.582
2025-07-01 05:49:55.593 > yield from g
2025-07-01 05:49:55.604
2025-07-01 05:49:55.613 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:49:55.620 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:49:55.627
2025-07-01 05:49:55.633 self = <difflib.Differ object at [hex]>
2025-07-01 05:49:55.639 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:49:55.644 alo = 403, ahi = 1101
2025-07-01 05:49:55.652 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:49:55.664 blo = 403, bhi = 1101
2025-07-01 05:49:55.678
2025-07-01 05:49:55.687 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:49:55.695 r"""
2025-07-01 05:49:55.702 When replacing one block of lines with another, search the blocks
2025-07-01 05:49:55.710 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:49:55.717 synch point, and intraline difference marking is done on the
2025-07-01 05:49:55.724 similar pair. Lots of work, but often worth it.
2025-07-01 05:49:55.731
2025-07-01 05:49:55.742 Example:
2025-07-01 05:49:55.749
2025-07-01 05:49:55.756 >>> d = Differ()
2025-07-01 05:49:55.762 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:49:55.772 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:49:55.777 >>> print(''.join(results), end="")
2025-07-01 05:49:55.783 - abcDefghiJkl
2025-07-01 05:49:55.794 + abcdefGhijkl
2025-07-01 05:49:55.815 """
2025-07-01 05:49:55.823
2025-07-01 05:49:55.835 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:49:55.851 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:49:55.862 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:49:55.869 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:49:55.878 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:49:55.885
2025-07-01 05:49:55.891 # search for the pair that matches best without being identical
2025-07-01 05:49:55.897 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:49:55.904 # on junk -- unless we have to)
2025-07-01 05:49:55.910 for j in range(blo, bhi):
2025-07-01 05:49:55.929 bj = b[j]
2025-07-01 05:49:55.934 cruncher.set_seq2(bj)
2025-07-01 05:49:55.939 for i in range(alo, ahi):
2025-07-01 05:49:55.944 ai = a[i]
2025-07-01 05:49:55.949 if ai == bj:
2025-07-01 05:49:55.955 if eqi is None:
2025-07-01 05:49:55.961 eqi, eqj = i, j
2025-07-01 05:49:55.967 continue
2025-07-01 05:49:55.978 cruncher.set_seq1(ai)
2025-07-01 05:49:55.991 # computing similarity is expensive, so use the quick
2025-07-01 05:49:56.002 # upper bounds first -- have seen this speed up messy
2025-07-01 05:49:56.011 # compares by a factor of 3.
2025-07-01 05:49:56.021 # note that ratio() is only expensive to compute the first
2025-07-01 05:49:56.029 # time it's called on a sequence pair; the expensive part
2025-07-01 05:49:56.040 # of the computation is cached by cruncher
2025-07-01 05:49:56.050 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:49:56.060 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:49:56.071 cruncher.ratio() > best_ratio:
2025-07-01 05:49:56.080 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:49:56.088 if best_ratio < cutoff:
2025-07-01 05:49:56.095 # no non-identical "pretty close" pair
2025-07-01 05:49:56.101 if eqi is None:
2025-07-01 05:49:56.107 # no identical pair either -- treat it as a straight replace
2025-07-01 05:49:56.114 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:49:56.119 return
2025-07-01 05:49:56.127 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:49:56.137 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:49:56.145 else:
2025-07-01 05:49:56.154 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:49:56.167 eqi = None
2025-07-01 05:49:56.178
2025-07-01 05:49:56.190 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:49:56.199 # identical
2025-07-01 05:49:56.211
2025-07-01 05:49:56.223 # pump out diffs from before the synch point
2025-07-01 05:49:56.234 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:49:56.244
2025-07-01 05:49:56.255 # do intraline marking on the synch pair
2025-07-01 05:49:56.263 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:49:56.271 if eqi is None:
2025-07-01 05:49:56.283 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:49:56.292 atags = btags = ""
2025-07-01 05:49:56.300 cruncher.set_seqs(aelt, belt)
2025-07-01 05:49:56.308 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:49:56.314 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:49:56.320 if tag == 'replace':
2025-07-01 05:49:56.326 atags += '^' * la
2025-07-01 05:49:56.337 btags += '^' * lb
2025-07-01 05:49:56.346 elif tag == 'delete':
2025-07-01 05:49:56.352 atags += '-' * la
2025-07-01 05:49:56.359 elif tag == 'insert':
2025-07-01 05:49:56.365 btags += '+' * lb
2025-07-01 05:49:56.370 elif tag == 'equal':
2025-07-01 05:49:56.376 atags += ' ' * la
2025-07-01 05:49:56.384 btags += ' ' * lb
2025-07-01 05:49:56.390 else:
2025-07-01 05:49:56.397 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:49:56.405 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:49:56.412 else:
2025-07-01 05:49:56.418 # the synch pair is identical
2025-07-01 05:49:56.428 yield ' ' + aelt
2025-07-01 05:49:56.440
2025-07-01 05:49:56.450 # pump out diffs from after the synch point
2025-07-01 05:49:56.461 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:49:56.472
2025-07-01 05:49:56.480 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:49:56.488 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:49:56.494
2025-07-01 05:49:56.505 self = <difflib.Differ object at [hex]>
2025-07-01 05:49:56.514 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:49:56.520 alo = 404, ahi = 1101
2025-07-01 05:49:56.527 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:49:56.534 blo = 404, bhi = 1101
2025-07-01 05:49:56.545
2025-07-01 05:49:56.554 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:49:56.561 g = []
2025-07-01 05:49:56.567 if alo < ahi:
2025-07-01 05:49:56.572 if blo < bhi:
2025-07-01 05:49:56.577 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:49:56.582 else:
2025-07-01 05:49:56.586 g = self._dump('-', a, alo, ahi)
2025-07-01 05:49:56.591 elif blo < bhi:
2025-07-01 05:49:56.596 g = self._dump('+', b, blo, bhi)
2025-07-01 05:49:56.602
2025-07-01 05:49:56.611 > yield from g
2025-07-01 05:49:56.623
2025-07-01 05:49:56.630 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:49:56.636 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:49:56.643
2025-07-01 05:49:56.649 self = <difflib.Differ object at [hex]>
2025-07-01 05:49:56.658 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:49:56.666 alo = 404, ahi = 1101
2025-07-01 05:49:56.674 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:49:56.682 blo = 404, bhi = 1101
2025-07-01 05:49:56.691
2025-07-01 05:49:56.699 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:49:56.705 r"""
2025-07-01 05:49:56.711 When replacing one block of lines with another, search the blocks
2025-07-01 05:49:56.719 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:49:56.729 synch point, and intraline difference marking is done on the
2025-07-01 05:49:56.737 similar pair. Lots of work, but often worth it.
2025-07-01 05:49:56.744
2025-07-01 05:49:56.749 Example:
2025-07-01 05:49:56.757
2025-07-01 05:49:56.768 >>> d = Differ()
2025-07-01 05:49:56.777 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:49:56.784 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:49:56.791 >>> print(''.join(results), end="")
2025-07-01 05:49:56.802 - abcDefghiJkl
2025-07-01 05:49:56.824 + abcdefGhijkl
2025-07-01 05:49:56.844 """
2025-07-01 05:49:56.852
2025-07-01 05:49:56.858 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:49:56.864 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:49:56.868 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:49:56.872 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:49:56.877 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:49:56.883
2025-07-01 05:49:56.890 # search for the pair that matches best without being identical
2025-07-01 05:49:56.899 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:49:56.910 # on junk -- unless we have to)
2025-07-01 05:49:56.918 for j in range(blo, bhi):
2025-07-01 05:49:56.925 bj = b[j]
2025-07-01 05:49:56.931 cruncher.set_seq2(bj)
2025-07-01 05:49:56.936 for i in range(alo, ahi):
2025-07-01 05:49:56.940 ai = a[i]
2025-07-01 05:49:56.945 if ai == bj:
2025-07-01 05:49:56.950 if eqi is None:
2025-07-01 05:49:56.954 eqi, eqj = i, j
2025-07-01 05:49:56.959 continue
2025-07-01 05:49:56.964 cruncher.set_seq1(ai)
2025-07-01 05:49:56.970 # computing similarity is expensive, so use the quick
2025-07-01 05:49:56.976 # upper bounds first -- have seen this speed up messy
2025-07-01 05:49:56.982 # compares by a factor of 3.
2025-07-01 05:49:56.990 # note that ratio() is only expensive to compute the first
2025-07-01 05:49:56.997 # time it's called on a sequence pair; the expensive part
2025-07-01 05:49:57.003 # of the computation is cached by cruncher
2025-07-01 05:49:57.011 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:49:57.018 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:49:57.027 cruncher.ratio() > best_ratio:
2025-07-01 05:49:57.038 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:49:57.048 if best_ratio < cutoff:
2025-07-01 05:49:57.055 # no non-identical "pretty close" pair
2025-07-01 05:49:57.067 if eqi is None:
2025-07-01 05:49:57.074 # no identical pair either -- treat it as a straight replace
2025-07-01 05:49:57.081 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:49:57.088 return
2025-07-01 05:49:57.095 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:49:57.102 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:49:57.111 else:
2025-07-01 05:49:57.122 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:49:57.133 eqi = None
2025-07-01 05:49:57.141
2025-07-01 05:49:57.147 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:49:57.153 # identical
2025-07-01 05:49:57.160
2025-07-01 05:49:57.167 # pump out diffs from before the synch point
2025-07-01 05:49:57.175 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:49:57.185
2025-07-01 05:49:57.195 # do intraline marking on the synch pair
2025-07-01 05:49:57.206 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:49:57.214 if eqi is None:
2025-07-01 05:49:57.222 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:49:57.229 atags = btags = ""
2025-07-01 05:49:57.236 cruncher.set_seqs(aelt, belt)
2025-07-01 05:49:57.244 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:49:57.251 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:49:57.258 if tag == 'replace':
2025-07-01 05:49:57.265 atags += '^' * la
2025-07-01 05:49:57.272 btags += '^' * lb
2025-07-01 05:49:57.279 elif tag == 'delete':
2025-07-01 05:49:57.286 atags += '-' * la
2025-07-01 05:49:57.296 elif tag == 'insert':
2025-07-01 05:49:57.304 btags += '+' * lb
2025-07-01 05:49:57.311 elif tag == 'equal':
2025-07-01 05:49:57.317 atags += ' ' * la
2025-07-01 05:49:57.322 btags += ' ' * lb
2025-07-01 05:49:57.327 else:
2025-07-01 05:49:57.331 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:49:57.339 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:49:57.345 else:
2025-07-01 05:49:57.351 # the synch pair is identical
2025-07-01 05:49:57.357 yield ' ' + aelt
2025-07-01 05:49:57.365
2025-07-01 05:49:57.371 # pump out diffs from after the synch point
2025-07-01 05:49:57.380 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:49:57.385
2025-07-01 05:49:57.391 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:49:57.397 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:49:57.403
2025-07-01 05:49:57.408 self = <difflib.Differ object at [hex]>
2025-07-01 05:49:57.415 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:49:57.420 alo = 405, ahi = 1101
2025-07-01 05:49:57.427 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:49:57.435 blo = 405, bhi = 1101
2025-07-01 05:49:57.448
2025-07-01 05:49:57.462 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:49:57.473 g = []
2025-07-01 05:49:57.480 if alo < ahi:
2025-07-01 05:49:57.490 if blo < bhi:
2025-07-01 05:49:57.501 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:49:57.512 else:
2025-07-01 05:49:57.523 g = self._dump('-', a, alo, ahi)
2025-07-01 05:49:57.532 elif blo < bhi:
2025-07-01 05:49:57.540 g = self._dump('+', b, blo, bhi)
2025-07-01 05:49:57.547
2025-07-01 05:49:57.559 > yield from g
2025-07-01 05:49:57.570
2025-07-01 05:49:57.580 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:49:57.589 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:49:57.595
2025-07-01 05:49:57.601 self = <difflib.Differ object at [hex]>
2025-07-01 05:49:57.608 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:49:57.620 alo = 405, ahi = 1101
2025-07-01 05:49:57.631 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:49:57.639 blo = 405, bhi = 1101
2025-07-01 05:49:57.647
2025-07-01 05:49:57.654 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:49:57.665 r"""
2025-07-01 05:49:57.677 When replacing one block of lines with another, search the blocks
2025-07-01 05:49:57.687 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:49:57.695 synch point, and intraline difference marking is done on the
2025-07-01 05:49:57.702 similar pair. Lots of work, but often worth it.
2025-07-01 05:49:57.708
2025-07-01 05:49:57.715 Example:
2025-07-01 05:49:57.720
2025-07-01 05:49:57.726 >>> d = Differ()
2025-07-01 05:49:57.734 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:49:57.745 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:49:57.757 >>> print(''.join(results), end="")
2025-07-01 05:49:57.768 - abcDefghiJkl
2025-07-01 05:49:57.790 + abcdefGhijkl
2025-07-01 05:49:57.806 """
2025-07-01 05:49:57.815
2025-07-01 05:49:57.825 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:49:57.833 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:49:57.844 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:49:57.854 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:49:57.862 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:49:57.867
2025-07-01 05:49:57.871 # search for the pair that matches best without being identical
2025-07-01 05:49:57.876 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:49:57.880 # on junk -- unless we have to)
2025-07-01 05:49:57.885 for j in range(blo, bhi):
2025-07-01 05:49:57.889 bj = b[j]
2025-07-01 05:49:57.893 cruncher.set_seq2(bj)
2025-07-01 05:49:57.897 for i in range(alo, ahi):
2025-07-01 05:49:57.902 ai = a[i]
2025-07-01 05:49:57.906 if ai == bj:
2025-07-01 05:49:57.910 if eqi is None:
2025-07-01 05:49:57.915 eqi, eqj = i, j
2025-07-01 05:49:57.919 continue
2025-07-01 05:49:57.923 cruncher.set_seq1(ai)
2025-07-01 05:49:57.932 # computing similarity is expensive, so use the quick
2025-07-01 05:49:57.937 # upper bounds first -- have seen this speed up messy
2025-07-01 05:49:57.943 # compares by a factor of 3.
2025-07-01 05:49:57.947 # note that ratio() is only expensive to compute the first
2025-07-01 05:49:57.952 # time it's called on a sequence pair; the expensive part
2025-07-01 05:49:57.956 # of the computation is cached by cruncher
2025-07-01 05:49:57.961 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:49:57.965 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:49:57.970 cruncher.ratio() > best_ratio:
2025-07-01 05:49:57.982 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:49:57.994 if best_ratio < cutoff:
2025-07-01 05:49:58.005 # no non-identical "pretty close" pair
2025-07-01 05:49:58.015 if eqi is None:
2025-07-01 05:49:58.026 # no identical pair either -- treat it as a straight replace
2025-07-01 05:49:58.035 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:49:58.043 return
2025-07-01 05:49:58.056 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:49:58.068 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:49:58.076 else:
2025-07-01 05:49:58.082 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:49:58.088 eqi = None
2025-07-01 05:49:58.093
2025-07-01 05:49:58.100 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:49:58.107 # identical
2025-07-01 05:49:58.114
2025-07-01 05:49:58.125 # pump out diffs from before the synch point
2025-07-01 05:49:58.137 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:49:58.150
2025-07-01 05:49:58.160 # do intraline marking on the synch pair
2025-07-01 05:49:58.170 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:49:58.179 if eqi is None:
2025-07-01 05:49:58.188 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:49:58.195 atags = btags = ""
2025-07-01 05:49:58.202 cruncher.set_seqs(aelt, belt)
2025-07-01 05:49:58.213 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:49:58.223 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:49:58.234 if tag == 'replace':
2025-07-01 05:49:58.241 atags += '^' * la
2025-07-01 05:49:58.248 btags += '^' * lb
2025-07-01 05:49:58.255 elif tag == 'delete':
2025-07-01 05:49:58.263 atags += '-' * la
2025-07-01 05:49:58.273 elif tag == 'insert':
2025-07-01 05:49:58.282 btags += '+' * lb
2025-07-01 05:49:58.290 elif tag == 'equal':
2025-07-01 05:49:58.300 atags += ' ' * la
2025-07-01 05:49:58.310 btags += ' ' * lb
2025-07-01 05:49:58.319 else:
2025-07-01 05:49:58.331 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:49:58.341 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:49:58.349 else:
2025-07-01 05:49:58.356 # the synch pair is identical
2025-07-01 05:49:58.366 yield ' ' + aelt
2025-07-01 05:49:58.376
2025-07-01 05:49:58.384 # pump out diffs from after the synch point
2025-07-01 05:49:58.393 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:49:58.403
2025-07-01 05:49:58.409 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:49:58.415 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:49:58.422
2025-07-01 05:49:58.428 self = <difflib.Differ object at [hex]>
2025-07-01 05:49:58.434 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:49:58.440 alo = 406, ahi = 1101
2025-07-01 05:49:58.446 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:49:58.454 blo = 406, bhi = 1101
2025-07-01 05:49:58.466
2025-07-01 05:49:58.473 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:49:58.479 g = []
2025-07-01 05:49:58.485 if alo < ahi:
2025-07-01 05:49:58.491 if blo < bhi:
2025-07-01 05:49:58.497 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:49:58.503 else:
2025-07-01 05:49:58.509 g = self._dump('-', a, alo, ahi)
2025-07-01 05:49:58.514 elif blo < bhi:
2025-07-01 05:49:58.524 g = self._dump('+', b, blo, bhi)
2025-07-01 05:49:58.530
2025-07-01 05:49:58.536 > yield from g
2025-07-01 05:49:58.540
2025-07-01 05:49:58.550 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:49:58.557 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:49:58.563
2025-07-01 05:49:58.571 self = <difflib.Differ object at [hex]>
2025-07-01 05:49:58.582 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:49:58.591 alo = 406, ahi = 1101
2025-07-01 05:49:58.604 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:49:58.613 blo = 406, bhi = 1101
2025-07-01 05:49:58.620
2025-07-01 05:49:58.627 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:49:58.633 r"""
2025-07-01 05:49:58.639 When replacing one block of lines with another, search the blocks
2025-07-01 05:49:58.646 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:49:58.653 synch point, and intraline difference marking is done on the
2025-07-01 05:49:58.658 similar pair. Lots of work, but often worth it.
2025-07-01 05:49:58.665
2025-07-01 05:49:58.670 Example:
2025-07-01 05:49:58.676
2025-07-01 05:49:58.683 >>> d = Differ()
2025-07-01 05:49:58.690 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:49:58.699 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:49:58.714 >>> print(''.join(results), end="")
2025-07-01 05:49:58.725 - abcDefghiJkl
2025-07-01 05:49:58.738 + abcdefGhijkl
2025-07-01 05:49:58.751 """
2025-07-01 05:49:58.756
2025-07-01 05:49:58.761 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:49:58.768 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:49:58.774 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:49:58.780 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:49:58.787 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:49:58.795
2025-07-01 05:49:58.802 # search for the pair that matches best without being identical
2025-07-01 05:49:58.809 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:49:58.815 # on junk -- unless we have to)
2025-07-01 05:49:58.821 for j in range(blo, bhi):
2025-07-01 05:49:58.827 bj = b[j]
2025-07-01 05:49:58.833 cruncher.set_seq2(bj)
2025-07-01 05:49:58.840 for i in range(alo, ahi):
2025-07-01 05:49:58.846 ai = a[i]
2025-07-01 05:49:58.853 if ai == bj:
2025-07-01 05:49:58.861 if eqi is None:
2025-07-01 05:49:58.868 eqi, eqj = i, j
2025-07-01 05:49:58.874 continue
2025-07-01 05:49:58.880 cruncher.set_seq1(ai)
2025-07-01 05:49:58.886 # computing similarity is expensive, so use the quick
2025-07-01 05:49:58.893 # upper bounds first -- have seen this speed up messy
2025-07-01 05:49:58.898 # compares by a factor of 3.
2025-07-01 05:49:58.904 # note that ratio() is only expensive to compute the first
2025-07-01 05:49:58.910 # time it's called on a sequence pair; the expensive part
2025-07-01 05:49:58.920 # of the computation is cached by cruncher
2025-07-01 05:49:58.930 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:49:58.937 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:49:58.943 cruncher.ratio() > best_ratio:
2025-07-01 05:49:58.950 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:49:58.958 if best_ratio < cutoff:
2025-07-01 05:49:58.965 # no non-identical "pretty close" pair
2025-07-01 05:49:58.974 if eqi is None:
2025-07-01 05:49:58.981 # no identical pair either -- treat it as a straight replace
2025-07-01 05:49:58.988 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:49:58.995 return
2025-07-01 05:49:59.001 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:49:59.007 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:49:59.013 else:
2025-07-01 05:49:59.020 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:49:59.026 eqi = None
2025-07-01 05:49:59.031
2025-07-01 05:49:59.039 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:49:59.049 # identical
2025-07-01 05:49:59.060
2025-07-01 05:49:59.070 # pump out diffs from before the synch point
2025-07-01 05:49:59.081 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:49:59.090
2025-07-01 05:49:59.099 # do intraline marking on the synch pair
2025-07-01 05:49:59.110 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:49:59.118 if eqi is None:
2025-07-01 05:49:59.130 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:49:59.139 atags = btags = ""
2025-07-01 05:49:59.144 cruncher.set_seqs(aelt, belt)
2025-07-01 05:49:59.156 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:49:59.168 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:49:59.176 if tag == 'replace':
2025-07-01 05:49:59.184 atags += '^' * la
2025-07-01 05:49:59.194 btags += '^' * lb
2025-07-01 05:49:59.206 elif tag == 'delete':
2025-07-01 05:49:59.215 atags += '-' * la
2025-07-01 05:49:59.224 elif tag == 'insert':
2025-07-01 05:49:59.231 btags += '+' * lb
2025-07-01 05:49:59.239 elif tag == 'equal':
2025-07-01 05:49:59.251 atags += ' ' * la
2025-07-01 05:49:59.259 btags += ' ' * lb
2025-07-01 05:49:59.265 else:
2025-07-01 05:49:59.271 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:49:59.282 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:49:59.291 else:
2025-07-01 05:49:59.301 # the synch pair is identical
2025-07-01 05:49:59.315 yield ' ' + aelt
2025-07-01 05:49:59.325
2025-07-01 05:49:59.339 # pump out diffs from after the synch point
2025-07-01 05:49:59.349 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:49:59.356
2025-07-01 05:49:59.362 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:49:59.367 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:49:59.378
2025-07-01 05:49:59.385 self = <difflib.Differ object at [hex]>
2025-07-01 05:49:59.392 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:49:59.398 alo = 407, ahi = 1101
2025-07-01 05:49:59.412 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:49:59.423 blo = 407, bhi = 1101
2025-07-01 05:49:59.434
2025-07-01 05:49:59.444 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:49:59.453 g = []
2025-07-01 05:49:59.459 if alo < ahi:
2025-07-01 05:49:59.467 if blo < bhi:
2025-07-01 05:49:59.476 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:49:59.484 else:
2025-07-01 05:49:59.491 g = self._dump('-', a, alo, ahi)
2025-07-01 05:49:59.497 elif blo < bhi:
2025-07-01 05:49:59.506 g = self._dump('+', b, blo, bhi)
2025-07-01 05:49:59.514
2025-07-01 05:49:59.522 > yield from g
2025-07-01 05:49:59.529
2025-07-01 05:49:59.536 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:49:59.543 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:49:59.551
2025-07-01 05:49:59.559 self = <difflib.Differ object at [hex]>
2025-07-01 05:49:59.571 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:49:59.580 alo = 407, ahi = 1101
2025-07-01 05:49:59.587 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:49:59.593 blo = 407, bhi = 1101
2025-07-01 05:49:59.599
2025-07-01 05:49:59.605 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:49:59.611 r"""
2025-07-01 05:49:59.617 When replacing one block of lines with another, search the blocks
2025-07-01 05:49:59.624 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:49:59.630 synch point, and intraline difference marking is done on the
2025-07-01 05:49:59.639 similar pair. Lots of work, but often worth it.
2025-07-01 05:49:59.651
2025-07-01 05:49:59.660 Example:
2025-07-01 05:49:59.667
2025-07-01 05:49:59.674 >>> d = Differ()
2025-07-01 05:49:59.679 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:49:59.684 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:49:59.691 >>> print(''.join(results), end="")
2025-07-01 05:49:59.698 - abcDefghiJkl
2025-07-01 05:49:59.711 + abcdefGhijkl
2025-07-01 05:49:59.729 """
2025-07-01 05:49:59.737
2025-07-01 05:49:59.743 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:49:59.749 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:49:59.755 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:49:59.760 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:49:59.766 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:49:59.772
2025-07-01 05:49:59.778 # search for the pair that matches best without being identical
2025-07-01 05:49:59.785 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:49:59.791 # on junk -- unless we have to)
2025-07-01 05:49:59.799 for j in range(blo, bhi):
2025-07-01 05:49:59.810 bj = b[j]
2025-07-01 05:49:59.818 cruncher.set_seq2(bj)
2025-07-01 05:49:59.824 for i in range(alo, ahi):
2025-07-01 05:49:59.830 ai = a[i]
2025-07-01 05:49:59.834 if ai == bj:
2025-07-01 05:49:59.839 if eqi is None:
2025-07-01 05:49:59.844 eqi, eqj = i, j
2025-07-01 05:49:59.848 continue
2025-07-01 05:49:59.853 cruncher.set_seq1(ai)
2025-07-01 05:49:59.858 # computing similarity is expensive, so use the quick
2025-07-01 05:49:59.864 # upper bounds first -- have seen this speed up messy
2025-07-01 05:49:59.869 # compares by a factor of 3.
2025-07-01 05:49:59.875 # note that ratio() is only expensive to compute the first
2025-07-01 05:49:59.881 # time it's called on a sequence pair; the expensive part
2025-07-01 05:49:59.887 # of the computation is cached by cruncher
2025-07-01 05:49:59.893 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:49:59.900 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:49:59.907 cruncher.ratio() > best_ratio:
2025-07-01 05:49:59.915 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:49:59.926 if best_ratio < cutoff:
2025-07-01 05:49:59.935 # no non-identical "pretty close" pair
2025-07-01 05:49:59.942 if eqi is None:
2025-07-01 05:49:59.948 # no identical pair either -- treat it as a straight replace
2025-07-01 05:49:59.953 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:49:59.959 return
2025-07-01 05:49:59.967 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:49:59.977 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:49:59.985 else:
2025-07-01 05:49:59.997 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:50:00.007 eqi = None
2025-07-01 05:50:00.013
2025-07-01 05:50:00.019 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:50:00.031 # identical
2025-07-01 05:50:00.042
2025-07-01 05:50:00.052 # pump out diffs from before the synch point
2025-07-01 05:50:00.062 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:50:00.070
2025-07-01 05:50:00.080 # do intraline marking on the synch pair
2025-07-01 05:50:00.091 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:50:00.099 if eqi is None:
2025-07-01 05:50:00.106 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:50:00.116 atags = btags = ""
2025-07-01 05:50:00.126 cruncher.set_seqs(aelt, belt)
2025-07-01 05:50:00.135 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:50:00.143 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:50:00.151 if tag == 'replace':
2025-07-01 05:50:00.162 atags += '^' * la
2025-07-01 05:50:00.172 btags += '^' * lb
2025-07-01 05:50:00.180 elif tag == 'delete':
2025-07-01 05:50:00.187 atags += '-' * la
2025-07-01 05:50:00.195 elif tag == 'insert':
2025-07-01 05:50:00.206 btags += '+' * lb
2025-07-01 05:50:00.215 elif tag == 'equal':
2025-07-01 05:50:00.223 atags += ' ' * la
2025-07-01 05:50:00.233 btags += ' ' * lb
2025-07-01 05:50:00.242 else:
2025-07-01 05:50:00.250 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:50:00.259 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:50:00.270 else:
2025-07-01 05:50:00.282 # the synch pair is identical
2025-07-01 05:50:00.294 yield ' ' + aelt
2025-07-01 05:50:00.306
2025-07-01 05:50:00.316 # pump out diffs from after the synch point
2025-07-01 05:50:00.325 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:50:00.331
2025-07-01 05:50:00.337 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:50:00.344 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:50:00.350
2025-07-01 05:50:00.360 self = <difflib.Differ object at [hex]>
2025-07-01 05:50:00.370 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:50:00.381 alo = 408, ahi = 1101
2025-07-01 05:50:00.391 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:50:00.398 blo = 408, bhi = 1101
2025-07-01 05:50:00.404
2025-07-01 05:50:00.410 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:50:00.415 g = []
2025-07-01 05:50:00.423 if alo < ahi:
2025-07-01 05:50:00.435 if blo < bhi:
2025-07-01 05:50:00.445 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:50:00.452 else:
2025-07-01 05:50:00.459 g = self._dump('-', a, alo, ahi)
2025-07-01 05:50:00.464 elif blo < bhi:
2025-07-01 05:50:00.470 g = self._dump('+', b, blo, bhi)
2025-07-01 05:50:00.480
2025-07-01 05:50:00.489 > yield from g
2025-07-01 05:50:00.496
2025-07-01 05:50:00.503 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:50:00.510 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:50:00.519
2025-07-01 05:50:00.529 self = <difflib.Differ object at [hex]>
2025-07-01 05:50:00.538 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:50:00.547 alo = 408, ahi = 1101
2025-07-01 05:50:00.556 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:50:00.562 blo = 408, bhi = 1101
2025-07-01 05:50:00.568
2025-07-01 05:50:00.574 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:50:00.579 r"""
2025-07-01 05:50:00.587 When replacing one block of lines with another, search the blocks
2025-07-01 05:50:00.593 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:50:00.599 synch point, and intraline difference marking is done on the
2025-07-01 05:50:00.607 similar pair. Lots of work, but often worth it.
2025-07-01 05:50:00.619
2025-07-01 05:50:00.627 Example:
2025-07-01 05:50:00.637
2025-07-01 05:50:00.648 >>> d = Differ()
2025-07-01 05:50:00.659 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:50:00.668 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:50:00.676 >>> print(''.join(results), end="")
2025-07-01 05:50:00.683 - abcDefghiJkl
2025-07-01 05:50:00.694 + abcdefGhijkl
2025-07-01 05:50:00.714 """
2025-07-01 05:50:00.722
2025-07-01 05:50:00.730 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:50:00.738 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:50:00.749 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:50:00.761 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:50:00.770 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:50:00.777
2025-07-01 05:50:00.786 # search for the pair that matches best without being identical
2025-07-01 05:50:00.793 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:50:00.799 # on junk -- unless we have to)
2025-07-01 05:50:00.807 for j in range(blo, bhi):
2025-07-01 05:50:00.814 bj = b[j]
2025-07-01 05:50:00.821 cruncher.set_seq2(bj)
2025-07-01 05:50:00.831 for i in range(alo, ahi):
2025-07-01 05:50:00.842 ai = a[i]
2025-07-01 05:50:00.853 if ai == bj:
2025-07-01 05:50:00.864 if eqi is None:
2025-07-01 05:50:00.873 eqi, eqj = i, j
2025-07-01 05:50:00.885 continue
2025-07-01 05:50:00.895 cruncher.set_seq1(ai)
2025-07-01 05:50:00.904 # computing similarity is expensive, so use the quick
2025-07-01 05:50:00.913 # upper bounds first -- have seen this speed up messy
2025-07-01 05:50:00.920 # compares by a factor of 3.
2025-07-01 05:50:00.926 # note that ratio() is only expensive to compute the first
2025-07-01 05:50:00.931 # time it's called on a sequence pair; the expensive part
2025-07-01 05:50:00.936 # of the computation is cached by cruncher
2025-07-01 05:50:00.941 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:50:00.945 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:50:00.950 cruncher.ratio() > best_ratio:
2025-07-01 05:50:00.954 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:50:00.960 if best_ratio < cutoff:
2025-07-01 05:50:00.971 # no non-identical "pretty close" pair
2025-07-01 05:50:00.978 if eqi is None:
2025-07-01 05:50:00.985 # no identical pair either -- treat it as a straight replace
2025-07-01 05:50:00.992 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:50:00.998 return
2025-07-01 05:50:01.008 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:50:01.016 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:50:01.023 else:
2025-07-01 05:50:01.029 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:50:01.033 eqi = None
2025-07-01 05:50:01.038
2025-07-01 05:50:01.043 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:50:01.048 # identical
2025-07-01 05:50:01.052
2025-07-01 05:50:01.057 # pump out diffs from before the synch point
2025-07-01 05:50:01.062 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:50:01.067
2025-07-01 05:50:01.072 # do intraline marking on the synch pair
2025-07-01 05:50:01.076 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:50:01.081 if eqi is None:
2025-07-01 05:50:01.086 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:50:01.090 atags = btags = ""
2025-07-01 05:50:01.095 cruncher.set_seqs(aelt, belt)
2025-07-01 05:50:01.100 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:50:01.107 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:50:01.112 if tag == 'replace':
2025-07-01 05:50:01.118 atags += '^' * la
2025-07-01 05:50:01.124 btags += '^' * lb
2025-07-01 05:50:01.130 elif tag == 'delete':
2025-07-01 05:50:01.140 atags += '-' * la
2025-07-01 05:50:01.149 elif tag == 'insert':
2025-07-01 05:50:01.155 btags += '+' * lb
2025-07-01 05:50:01.161 elif tag == 'equal':
2025-07-01 05:50:01.167 atags += ' ' * la
2025-07-01 05:50:01.174 btags += ' ' * lb
2025-07-01 05:50:01.185 else:
2025-07-01 05:50:01.194 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:50:01.201 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:50:01.208 else:
2025-07-01 05:50:01.214 # the synch pair is identical
2025-07-01 05:50:01.223 yield ' ' + aelt
2025-07-01 05:50:01.234
2025-07-01 05:50:01.243 # pump out diffs from after the synch point
2025-07-01 05:50:01.251 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:50:01.258
2025-07-01 05:50:01.265 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:50:01.272 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:50:01.278
2025-07-01 05:50:01.284 self = <difflib.Differ object at [hex]>
2025-07-01 05:50:01.297 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:50:01.306 alo = 409, ahi = 1101
2025-07-01 05:50:01.314 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:50:01.321 blo = 409, bhi = 1101
2025-07-01 05:50:01.328
2025-07-01 05:50:01.335 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:50:01.344 g = []
2025-07-01 05:50:01.357 if alo < ahi:
2025-07-01 05:50:01.365 if blo < bhi:
2025-07-01 05:50:01.372 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:50:01.379 else:
2025-07-01 05:50:01.387 g = self._dump('-', a, alo, ahi)
2025-07-01 05:50:01.397 elif blo < bhi:
2025-07-01 05:50:01.404 g = self._dump('+', b, blo, bhi)
2025-07-01 05:50:01.411
2025-07-01 05:50:01.417 > yield from g
2025-07-01 05:50:01.423
2025-07-01 05:50:01.429 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:50:01.436 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:50:01.442
2025-07-01 05:50:01.448 self = <difflib.Differ object at [hex]>
2025-07-01 05:50:01.454 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:50:01.460 alo = 409, ahi = 1101
2025-07-01 05:50:01.473 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:50:01.483 blo = 409, bhi = 1101
2025-07-01 05:50:01.490
2025-07-01 05:50:01.496 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:50:01.503 r"""
2025-07-01 05:50:01.510 When replacing one block of lines with another, search the blocks
2025-07-01 05:50:01.516 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:50:01.523 synch point, and intraline difference marking is done on the
2025-07-01 05:50:01.529 similar pair. Lots of work, but often worth it.
2025-07-01 05:50:01.536
2025-07-01 05:50:01.542 Example:
2025-07-01 05:50:01.550
2025-07-01 05:50:01.557 >>> d = Differ()
2025-07-01 05:50:01.565 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:50:01.572 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:50:01.580 >>> print(''.join(results), end="")
2025-07-01 05:50:01.587 - abcDefghiJkl
2025-07-01 05:50:01.602 + abcdefGhijkl
2025-07-01 05:50:01.616 """
2025-07-01 05:50:01.621
2025-07-01 05:50:01.627 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:50:01.634 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:50:01.640 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:50:01.647 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:50:01.654 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:50:01.660
2025-07-01 05:50:01.666 # search for the pair that matches best without being identical
2025-07-01 05:50:01.671 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:50:01.676 # on junk -- unless we have to)
2025-07-01 05:50:01.681 for j in range(blo, bhi):
2025-07-01 05:50:01.689 bj = b[j]
2025-07-01 05:50:01.701 cruncher.set_seq2(bj)
2025-07-01 05:50:01.709 for i in range(alo, ahi):
2025-07-01 05:50:01.716 ai = a[i]
2025-07-01 05:50:01.723 if ai == bj:
2025-07-01 05:50:01.731 if eqi is None:
2025-07-01 05:50:01.738 eqi, eqj = i, j
2025-07-01 05:50:01.747 continue
2025-07-01 05:50:01.755 cruncher.set_seq1(ai)
2025-07-01 05:50:01.761 # computing similarity is expensive, so use the quick
2025-07-01 05:50:01.769 # upper bounds first -- have seen this speed up messy
2025-07-01 05:50:01.779 # compares by a factor of 3.
2025-07-01 05:50:01.790 # note that ratio() is only expensive to compute the first
2025-07-01 05:50:01.797 # time it's called on a sequence pair; the expensive part
2025-07-01 05:50:01.805 # of the computation is cached by cruncher
2025-07-01 05:50:01.812 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:50:01.818 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:50:01.826 cruncher.ratio() > best_ratio:
2025-07-01 05:50:01.832 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:50:01.838 if best_ratio < cutoff:
2025-07-01 05:50:01.844 # no non-identical "pretty close" pair
2025-07-01 05:50:01.850 if eqi is None:
2025-07-01 05:50:01.855 # no identical pair either -- treat it as a straight replace
2025-07-01 05:50:01.862 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:50:01.871 return
2025-07-01 05:50:01.878 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:50:01.885 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:50:01.892 else:
2025-07-01 05:50:01.899 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:50:01.905 eqi = None
2025-07-01 05:50:01.911
2025-07-01 05:50:01.919 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:50:01.932 # identical
2025-07-01 05:50:01.940
2025-07-01 05:50:01.948 # pump out diffs from before the synch point
2025-07-01 05:50:01.955 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:50:01.963
2025-07-01 05:50:01.976 # do intraline marking on the synch pair
2025-07-01 05:50:01.986 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:50:01.993 if eqi is None:
2025-07-01 05:50:02.000 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:50:02.008 atags = btags = ""
2025-07-01 05:50:02.017 cruncher.set_seqs(aelt, belt)
2025-07-01 05:50:02.029 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:50:02.037 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:50:02.044 if tag == 'replace':
2025-07-01 05:50:02.051 atags += '^' * la
2025-07-01 05:50:02.057 btags += '^' * lb
2025-07-01 05:50:02.064 elif tag == 'delete':
2025-07-01 05:50:02.071 atags += '-' * la
2025-07-01 05:50:02.080 elif tag == 'insert':
2025-07-01 05:50:02.091 btags += '+' * lb
2025-07-01 05:50:02.100 elif tag == 'equal':
2025-07-01 05:50:02.107 atags += ' ' * la
2025-07-01 05:50:02.112 btags += ' ' * lb
2025-07-01 05:50:02.118 else:
2025-07-01 05:50:02.129 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:50:02.135 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:50:02.141 else:
2025-07-01 05:50:02.147 # the synch pair is identical
2025-07-01 05:50:02.155 yield ' ' + aelt
2025-07-01 05:50:02.164
2025-07-01 05:50:02.171 # pump out diffs from after the synch point
2025-07-01 05:50:02.177 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:50:02.182
2025-07-01 05:50:02.192 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:50:02.200 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:50:02.207
2025-07-01 05:50:02.219 self = <difflib.Differ object at [hex]>
2025-07-01 05:50:02.228 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:50:02.236 alo = 410, ahi = 1101
2025-07-01 05:50:02.244 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:50:02.254 blo = 410, bhi = 1101
2025-07-01 05:50:02.267
2025-07-01 05:50:02.278 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:50:02.287 g = []
2025-07-01 05:50:02.295 if alo < ahi:
2025-07-01 05:50:02.308 if blo < bhi:
2025-07-01 05:50:02.316 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:50:02.323 else:
2025-07-01 05:50:02.330 g = self._dump('-', a, alo, ahi)
2025-07-01 05:50:02.337 elif blo < bhi:
2025-07-01 05:50:02.345 g = self._dump('+', b, blo, bhi)
2025-07-01 05:50:02.350
2025-07-01 05:50:02.357 > yield from g
2025-07-01 05:50:02.364
2025-07-01 05:50:02.371 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:50:02.379 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:50:02.392
2025-07-01 05:50:02.399 self = <difflib.Differ object at [hex]>
2025-07-01 05:50:02.415 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:50:02.426 alo = 410, ahi = 1101
2025-07-01 05:50:02.437 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:50:02.448 blo = 410, bhi = 1101
2025-07-01 05:50:02.462
2025-07-01 05:50:02.473 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:50:02.483 r"""
2025-07-01 05:50:02.490 When replacing one block of lines with another, search the blocks
2025-07-01 05:50:02.497 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:50:02.510 synch point, and intraline difference marking is done on the
2025-07-01 05:50:02.521 similar pair. Lots of work, but often worth it.
2025-07-01 05:50:02.530
2025-07-01 05:50:02.537 Example:
2025-07-01 05:50:02.544
2025-07-01 05:50:02.550 >>> d = Differ()
2025-07-01 05:50:02.557 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:50:02.563 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:50:02.576 >>> print(''.join(results), end="")
2025-07-01 05:50:02.586 - abcDefghiJkl
2025-07-01 05:50:02.611 + abcdefGhijkl
2025-07-01 05:50:02.629 """
2025-07-01 05:50:02.641
2025-07-01 05:50:02.652 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:50:02.663 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:50:02.673 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:50:02.681 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:50:02.688 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:50:02.694
2025-07-01 05:50:02.704 # search for the pair that matches best without being identical
2025-07-01 05:50:02.714 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:50:02.721 # on junk -- unless we have to)
2025-07-01 05:50:02.728 for j in range(blo, bhi):
2025-07-01 05:50:02.735 bj = b[j]
2025-07-01 05:50:02.741 cruncher.set_seq2(bj)
2025-07-01 05:50:02.746 for i in range(alo, ahi):
2025-07-01 05:50:02.751 ai = a[i]
2025-07-01 05:50:02.756 if ai == bj:
2025-07-01 05:50:02.761 if eqi is None:
2025-07-01 05:50:02.770 eqi, eqj = i, j
2025-07-01 05:50:02.780 continue
2025-07-01 05:50:02.792 cruncher.set_seq1(ai)
2025-07-01 05:50:02.804 # computing similarity is expensive, so use the quick
2025-07-01 05:50:02.815 # upper bounds first -- have seen this speed up messy
2025-07-01 05:50:02.823 # compares by a factor of 3.
2025-07-01 05:50:02.835 # note that ratio() is only expensive to compute the first
2025-07-01 05:50:02.847 # time it's called on a sequence pair; the expensive part
2025-07-01 05:50:02.859 # of the computation is cached by cruncher
2025-07-01 05:50:02.868 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:50:02.876 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:50:02.883 cruncher.ratio() > best_ratio:
2025-07-01 05:50:02.889 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:50:02.895 if best_ratio < cutoff:
2025-07-01 05:50:02.901 # no non-identical "pretty close" pair
2025-07-01 05:50:02.907 if eqi is None:
2025-07-01 05:50:02.915 # no identical pair either -- treat it as a straight replace
2025-07-01 05:50:02.926 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:50:02.935 return
2025-07-01 05:50:02.942 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:50:02.949 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:50:02.955 else:
2025-07-01 05:50:02.960 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:50:02.965 eqi = None
2025-07-01 05:50:02.970
2025-07-01 05:50:02.976 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:50:02.982 # identical
2025-07-01 05:50:02.994
2025-07-01 05:50:03.002 # pump out diffs from before the synch point
2025-07-01 05:50:03.009 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:50:03.016
2025-07-01 05:50:03.025 # do intraline marking on the synch pair
2025-07-01 05:50:03.037 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:50:03.047 if eqi is None:
2025-07-01 05:50:03.054 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:50:03.061 atags = btags = ""
2025-07-01 05:50:03.067 cruncher.set_seqs(aelt, belt)
2025-07-01 05:50:03.074 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:50:03.086 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:50:03.097 if tag == 'replace':
2025-07-01 05:50:03.107 atags += '^' * la
2025-07-01 05:50:03.117 btags += '^' * lb
2025-07-01 05:50:03.124 elif tag == 'delete':
2025-07-01 05:50:03.130 atags += '-' * la
2025-07-01 05:50:03.139 elif tag == 'insert':
2025-07-01 05:50:03.149 btags += '+' * lb
2025-07-01 05:50:03.156 elif tag == 'equal':
2025-07-01 05:50:03.163 atags += ' ' * la
2025-07-01 05:50:03.169 btags += ' ' * lb
2025-07-01 05:50:03.174 else:
2025-07-01 05:50:03.185 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:50:03.193 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:50:03.205 else:
2025-07-01 05:50:03.214 # the synch pair is identical
2025-07-01 05:50:03.220 yield ' ' + aelt
2025-07-01 05:50:03.227
2025-07-01 05:50:03.234 # pump out diffs from after the synch point
2025-07-01 05:50:03.244 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:50:03.253
2025-07-01 05:50:03.261 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:50:03.268 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:50:03.274
2025-07-01 05:50:03.285 self = <difflib.Differ object at [hex]>
2025-07-01 05:50:03.294 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:50:03.302 alo = 411, ahi = 1101
2025-07-01 05:50:03.310 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:50:03.317 blo = 411, bhi = 1101
2025-07-01 05:50:03.324
2025-07-01 05:50:03.331 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:50:03.337 g = []
2025-07-01 05:50:03.344 if alo < ahi:
2025-07-01 05:50:03.351 if blo < bhi:
2025-07-01 05:50:03.360 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:50:03.372 else:
2025-07-01 05:50:03.380 g = self._dump('-', a, alo, ahi)
2025-07-01 05:50:03.387 elif blo < bhi:
2025-07-01 05:50:03.395 g = self._dump('+', b, blo, bhi)
2025-07-01 05:50:03.405
2025-07-01 05:50:03.413 > yield from g
2025-07-01 05:50:03.421
2025-07-01 05:50:03.428 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:50:03.434 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:50:03.440
2025-07-01 05:50:03.447 self = <difflib.Differ object at [hex]>
2025-07-01 05:50:03.455 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:50:03.462 alo = 411, ahi = 1101
2025-07-01 05:50:03.471 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:50:03.482 blo = 411, bhi = 1101
2025-07-01 05:50:03.490
2025-07-01 05:50:03.496 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:50:03.502 r"""
2025-07-01 05:50:03.507 When replacing one block of lines with another, search the blocks
2025-07-01 05:50:03.513 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:50:03.520 synch point, and intraline difference marking is done on the
2025-07-01 05:50:03.526 similar pair. Lots of work, but often worth it.
2025-07-01 05:50:03.534
2025-07-01 05:50:03.542 Example:
2025-07-01 05:50:03.547
2025-07-01 05:50:03.553 >>> d = Differ()
2025-07-01 05:50:03.558 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:50:03.562 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:50:03.567 >>> print(''.join(results), end="")
2025-07-01 05:50:03.572 - abcDefghiJkl
2025-07-01 05:50:03.583 + abcdefGhijkl
2025-07-01 05:50:03.594 """
2025-07-01 05:50:03.600
2025-07-01 05:50:03.605 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:50:03.611 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:50:03.617 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:50:03.628 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:50:03.637 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:50:03.644
2025-07-01 05:50:03.653 # search for the pair that matches best without being identical
2025-07-01 05:50:03.659 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:50:03.666 # on junk -- unless we have to)
2025-07-01 05:50:03.672 for j in range(blo, bhi):
2025-07-01 05:50:03.678 bj = b[j]
2025-07-01 05:50:03.685 cruncher.set_seq2(bj)
2025-07-01 05:50:03.691 for i in range(alo, ahi):
2025-07-01 05:50:03.696 ai = a[i]
2025-07-01 05:50:03.702 if ai == bj:
2025-07-01 05:50:03.710 if eqi is None:
2025-07-01 05:50:03.717 eqi, eqj = i, j
2025-07-01 05:50:03.726 continue
2025-07-01 05:50:03.737 cruncher.set_seq1(ai)
2025-07-01 05:50:03.745 # computing similarity is expensive, so use the quick
2025-07-01 05:50:03.752 # upper bounds first -- have seen this speed up messy
2025-07-01 05:50:03.759 # compares by a factor of 3.
2025-07-01 05:50:03.765 # note that ratio() is only expensive to compute the first
2025-07-01 05:50:03.771 # time it's called on a sequence pair; the expensive part
2025-07-01 05:50:03.779 # of the computation is cached by cruncher
2025-07-01 05:50:03.789 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:50:03.798 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:50:03.808 cruncher.ratio() > best_ratio:
2025-07-01 05:50:03.817 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:50:03.823 if best_ratio < cutoff:
2025-07-01 05:50:03.828 # no non-identical "pretty close" pair
2025-07-01 05:50:03.833 if eqi is None:
2025-07-01 05:50:03.838 # no identical pair either -- treat it as a straight replace
2025-07-01 05:50:03.843 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:50:03.849 return
2025-07-01 05:50:03.855 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:50:03.862 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:50:03.868 else:
2025-07-01 05:50:03.875 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:50:03.880 eqi = None
2025-07-01 05:50:03.887
2025-07-01 05:50:03.896 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:50:03.903 # identical
2025-07-01 05:50:03.909
2025-07-01 05:50:03.922 # pump out diffs from before the synch point
2025-07-01 05:50:03.931 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:50:03.939
2025-07-01 05:50:03.946 # do intraline marking on the synch pair
2025-07-01 05:50:03.956 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:50:03.965 if eqi is None:
2025-07-01 05:50:03.971 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:50:03.981 atags = btags = ""
2025-07-01 05:50:03.992 cruncher.set_seqs(aelt, belt)
2025-07-01 05:50:04.001 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:50:04.012 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:50:04.022 if tag == 'replace':
2025-07-01 05:50:04.034 atags += '^' * la
2025-07-01 05:50:04.047 btags += '^' * lb
2025-07-01 05:50:04.056 elif tag == 'delete':
2025-07-01 05:50:04.063 atags += '-' * la
2025-07-01 05:50:04.069 elif tag == 'insert':
2025-07-01 05:50:04.075 btags += '+' * lb
2025-07-01 05:50:04.080 elif tag == 'equal':
2025-07-01 05:50:04.084 atags += ' ' * la
2025-07-01 05:50:04.089 btags += ' ' * lb
2025-07-01 05:50:04.095 else:
2025-07-01 05:50:04.102 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:50:04.113 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:50:04.119 else:
2025-07-01 05:50:04.125 # the synch pair is identical
2025-07-01 05:50:04.131 yield ' ' + aelt
2025-07-01 05:50:04.138
2025-07-01 05:50:04.149 # pump out diffs from after the synch point
2025-07-01 05:50:04.157 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:50:04.163
2025-07-01 05:50:04.169 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:50:04.174 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:50:04.180
2025-07-01 05:50:04.186 self = <difflib.Differ object at [hex]>
2025-07-01 05:50:04.192 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:50:04.197 alo = 412, ahi = 1101
2025-07-01 05:50:04.203 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:50:04.209 blo = 412, bhi = 1101
2025-07-01 05:50:04.214
2025-07-01 05:50:04.220 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:50:04.225 g = []
2025-07-01 05:50:04.231 if alo < ahi:
2025-07-01 05:50:04.237 if blo < bhi:
2025-07-01 05:50:04.247 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:50:04.258 else:
2025-07-01 05:50:04.268 g = self._dump('-', a, alo, ahi)
2025-07-01 05:50:04.275 elif blo < bhi:
2025-07-01 05:50:04.282 g = self._dump('+', b, blo, bhi)
2025-07-01 05:50:04.290
2025-07-01 05:50:04.297 > yield from g
2025-07-01 05:50:04.304
2025-07-01 05:50:04.311 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:50:04.318 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:50:04.323
2025-07-01 05:50:04.330 self = <difflib.Differ object at [hex]>
2025-07-01 05:50:04.337 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:50:04.345 alo = 412, ahi = 1101
2025-07-01 05:50:04.359 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:50:04.371 blo = 412, bhi = 1101
2025-07-01 05:50:04.381
2025-07-01 05:50:04.389 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:50:04.395 r"""
2025-07-01 05:50:04.404 When replacing one block of lines with another, search the blocks
2025-07-01 05:50:04.410 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:50:04.416 synch point, and intraline difference marking is done on the
2025-07-01 05:50:04.420 similar pair. Lots of work, but often worth it.
2025-07-01 05:50:04.425
2025-07-01 05:50:04.429 Example:
2025-07-01 05:50:04.434
2025-07-01 05:50:04.439 >>> d = Differ()
2025-07-01 05:50:04.443 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:50:04.456 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:50:04.466 >>> print(''.join(results), end="")
2025-07-01 05:50:04.475 - abcDefghiJkl
2025-07-01 05:50:04.489 + abcdefGhijkl
2025-07-01 05:50:04.500 """
2025-07-01 05:50:04.505
2025-07-01 05:50:04.512 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:50:04.517 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:50:04.522 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:50:04.530 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:50:04.542 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:50:04.550
2025-07-01 05:50:04.560 # search for the pair that matches best without being identical
2025-07-01 05:50:04.568 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:50:04.579 # on junk -- unless we have to)
2025-07-01 05:50:04.590 for j in range(blo, bhi):
2025-07-01 05:50:04.599 bj = b[j]
2025-07-01 05:50:04.607 cruncher.set_seq2(bj)
2025-07-01 05:50:04.615 for i in range(alo, ahi):
2025-07-01 05:50:04.625 ai = a[i]
2025-07-01 05:50:04.634 if ai == bj:
2025-07-01 05:50:04.640 if eqi is None:
2025-07-01 05:50:04.647 eqi, eqj = i, j
2025-07-01 05:50:04.653 continue
2025-07-01 05:50:04.659 cruncher.set_seq1(ai)
2025-07-01 05:50:04.666 # computing similarity is expensive, so use the quick
2025-07-01 05:50:04.674 # upper bounds first -- have seen this speed up messy
2025-07-01 05:50:04.681 # compares by a factor of 3.
2025-07-01 05:50:04.688 # note that ratio() is only expensive to compute the first
2025-07-01 05:50:04.694 # time it's called on a sequence pair; the expensive part
2025-07-01 05:50:04.699 # of the computation is cached by cruncher
2025-07-01 05:50:04.705 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:50:04.712 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:50:04.718 cruncher.ratio() > best_ratio:
2025-07-01 05:50:04.723 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:50:04.728 if best_ratio < cutoff:
2025-07-01 05:50:04.733 # no non-identical "pretty close" pair
2025-07-01 05:50:04.739 if eqi is None:
2025-07-01 05:50:04.746 # no identical pair either -- treat it as a straight replace
2025-07-01 05:50:04.752 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:50:04.759 return
2025-07-01 05:50:04.765 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:50:04.771 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:50:04.778 else:
2025-07-01 05:50:04.784 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:50:04.791 eqi = None
2025-07-01 05:50:04.797
2025-07-01 05:50:04.804 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:50:04.811 # identical
2025-07-01 05:50:04.819
2025-07-01 05:50:04.829 # pump out diffs from before the synch point
2025-07-01 05:50:04.837 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:50:04.843
2025-07-01 05:50:04.850 # do intraline marking on the synch pair
2025-07-01 05:50:04.857 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:50:04.863 if eqi is None:
2025-07-01 05:50:04.870 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:50:04.877 atags = btags = ""
2025-07-01 05:50:04.884 cruncher.set_seqs(aelt, belt)
2025-07-01 05:50:04.896 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:50:04.904 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:50:04.911 if tag == 'replace':
2025-07-01 05:50:04.917 atags += '^' * la
2025-07-01 05:50:04.922 btags += '^' * lb
2025-07-01 05:50:04.927 elif tag == 'delete':
2025-07-01 05:50:04.932 atags += '-' * la
2025-07-01 05:50:04.938 elif tag == 'insert':
2025-07-01 05:50:04.944 btags += '+' * lb
2025-07-01 05:50:04.951 elif tag == 'equal':
2025-07-01 05:50:04.958 atags += ' ' * la
2025-07-01 05:50:04.969 btags += ' ' * lb
2025-07-01 05:50:04.978 else:
2025-07-01 05:50:04.985 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:50:04.992 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:50:04.997 else:
2025-07-01 05:50:05.005 # the synch pair is identical
2025-07-01 05:50:05.011 yield ' ' + aelt
2025-07-01 05:50:05.018
2025-07-01 05:50:05.028 # pump out diffs from after the synch point
2025-07-01 05:50:05.036 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:50:05.042
2025-07-01 05:50:05.047 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:50:05.053 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:50:05.058
2025-07-01 05:50:05.063 self = <difflib.Differ object at [hex]>
2025-07-01 05:50:05.068 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:50:05.073 alo = 413, ahi = 1101
2025-07-01 05:50:05.078 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:50:05.083 blo = 413, bhi = 1101
2025-07-01 05:50:05.092
2025-07-01 05:50:05.103 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:50:05.111 g = []
2025-07-01 05:50:05.118 if alo < ahi:
2025-07-01 05:50:05.124 if blo < bhi:
2025-07-01 05:50:05.129 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:50:05.134 else:
2025-07-01 05:50:05.140 g = self._dump('-', a, alo, ahi)
2025-07-01 05:50:05.145 elif blo < bhi:
2025-07-01 05:50:05.151 g = self._dump('+', b, blo, bhi)
2025-07-01 05:50:05.157
2025-07-01 05:50:05.162 > yield from g
2025-07-01 05:50:05.169
2025-07-01 05:50:05.176 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:50:05.183 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:50:05.188
2025-07-01 05:50:05.193 self = <difflib.Differ object at [hex]>
2025-07-01 05:50:05.203 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:50:05.215 alo = 413, ahi = 1101
2025-07-01 05:50:05.223 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:50:05.231 blo = 413, bhi = 1101
2025-07-01 05:50:05.239
2025-07-01 05:50:05.246 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:50:05.252 r"""
2025-07-01 05:50:05.259 When replacing one block of lines with another, search the blocks
2025-07-01 05:50:05.265 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:50:05.272 synch point, and intraline difference marking is done on the
2025-07-01 05:50:05.277 similar pair. Lots of work, but often worth it.
2025-07-01 05:50:05.282
2025-07-01 05:50:05.287 Example:
2025-07-01 05:50:05.292
2025-07-01 05:50:05.298 >>> d = Differ()
2025-07-01 05:50:05.303 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:50:05.310 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:50:05.315 >>> print(''.join(results), end="")
2025-07-01 05:50:05.320 - abcDefghiJkl
2025-07-01 05:50:05.331 + abcdefGhijkl
2025-07-01 05:50:05.342 """
2025-07-01 05:50:05.350
2025-07-01 05:50:05.357 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:50:05.364 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:50:05.369 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:50:05.374 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:50:05.378 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:50:05.383
2025-07-01 05:50:05.388 # search for the pair that matches best without being identical
2025-07-01 05:50:05.394 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:50:05.400 # on junk -- unless we have to)
2025-07-01 05:50:05.406 for j in range(blo, bhi):
2025-07-01 05:50:05.413 bj = b[j]
2025-07-01 05:50:05.420 cruncher.set_seq2(bj)
2025-07-01 05:50:05.426 for i in range(alo, ahi):
2025-07-01 05:50:05.431 ai = a[i]
2025-07-01 05:50:05.438 if ai == bj:
2025-07-01 05:50:05.448 if eqi is None:
2025-07-01 05:50:05.459 eqi, eqj = i, j
2025-07-01 05:50:05.469 continue
2025-07-01 05:50:05.477 cruncher.set_seq1(ai)
2025-07-01 05:50:05.484 # computing similarity is expensive, so use the quick
2025-07-01 05:50:05.490 # upper bounds first -- have seen this speed up messy
2025-07-01 05:50:05.496 # compares by a factor of 3.
2025-07-01 05:50:05.509 # note that ratio() is only expensive to compute the first
2025-07-01 05:50:05.520 # time it's called on a sequence pair; the expensive part
2025-07-01 05:50:05.530 # of the computation is cached by cruncher
2025-07-01 05:50:05.543 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:50:05.553 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:50:05.560 cruncher.ratio() > best_ratio:
2025-07-01 05:50:05.567 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:50:05.573 if best_ratio < cutoff:
2025-07-01 05:50:05.583 # no non-identical "pretty close" pair
2025-07-01 05:50:05.588 if eqi is None:
2025-07-01 05:50:05.594 # no identical pair either -- treat it as a straight replace
2025-07-01 05:50:05.599 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:50:05.604 return
2025-07-01 05:50:05.609 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:50:05.615 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:50:05.621 else:
2025-07-01 05:50:05.632 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:50:05.640 eqi = None
2025-07-01 05:50:05.647
2025-07-01 05:50:05.654 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:50:05.660 # identical
2025-07-01 05:50:05.666
2025-07-01 05:50:05.675 # pump out diffs from before the synch point
2025-07-01 05:50:05.687 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:50:05.697
2025-07-01 05:50:05.706 # do intraline marking on the synch pair
2025-07-01 05:50:05.714 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:50:05.724 if eqi is None:
2025-07-01 05:50:05.734 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:50:05.742 atags = btags = ""
2025-07-01 05:50:05.751 cruncher.set_seqs(aelt, belt)
2025-07-01 05:50:05.759 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:50:05.769 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:50:05.777 if tag == 'replace':
2025-07-01 05:50:05.786 atags += '^' * la
2025-07-01 05:50:05.794 btags += '^' * lb
2025-07-01 05:50:05.804 elif tag == 'delete':
2025-07-01 05:50:05.814 atags += '-' * la
2025-07-01 05:50:05.822 elif tag == 'insert':
2025-07-01 05:50:05.829 btags += '+' * lb
2025-07-01 05:50:05.835 elif tag == 'equal':
2025-07-01 05:50:05.841 atags += ' ' * la
2025-07-01 05:50:05.846 btags += ' ' * lb
2025-07-01 05:50:05.852 else:
2025-07-01 05:50:05.859 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:50:05.866 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:50:05.877 else:
2025-07-01 05:50:05.886 # the synch pair is identical
2025-07-01 05:50:05.893 yield ' ' + aelt
2025-07-01 05:50:05.898
2025-07-01 05:50:05.904 # pump out diffs from after the synch point
2025-07-01 05:50:05.910 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:50:05.915
2025-07-01 05:50:05.921 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:50:05.927 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:50:05.935
2025-07-01 05:50:05.945 self = <difflib.Differ object at [hex]>
2025-07-01 05:50:05.959 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:50:05.969 alo = 414, ahi = 1101
2025-07-01 05:50:05.976 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:50:05.983 blo = 414, bhi = 1101
2025-07-01 05:50:05.988
2025-07-01 05:50:05.993 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:50:05.997 g = []
2025-07-01 05:50:06.002 if alo < ahi:
2025-07-01 05:50:06.006 if blo < bhi:
2025-07-01 05:50:06.011 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:50:06.016 else:
2025-07-01 05:50:06.021 g = self._dump('-', a, alo, ahi)
2025-07-01 05:50:06.026 elif blo < bhi:
2025-07-01 05:50:06.032 g = self._dump('+', b, blo, bhi)
2025-07-01 05:50:06.037
2025-07-01 05:50:06.044 > yield from g
2025-07-01 05:50:06.051
2025-07-01 05:50:06.056 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:50:06.062 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:50:06.067
2025-07-01 05:50:06.073 self = <difflib.Differ object at [hex]>
2025-07-01 05:50:06.079 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:50:06.087 alo = 414, ahi = 1101
2025-07-01 05:50:06.094 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:50:06.101 blo = 414, bhi = 1101
2025-07-01 05:50:06.107
2025-07-01 05:50:06.113 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:50:06.118 r"""
2025-07-01 05:50:06.124 When replacing one block of lines with another, search the blocks
2025-07-01 05:50:06.130 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:50:06.136 synch point, and intraline difference marking is done on the
2025-07-01 05:50:06.142 similar pair. Lots of work, but often worth it.
2025-07-01 05:50:06.152
2025-07-01 05:50:06.161 Example:
2025-07-01 05:50:06.171
2025-07-01 05:50:06.178 >>> d = Differ()
2025-07-01 05:50:06.185 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:50:06.191 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:50:06.198 >>> print(''.join(results), end="")
2025-07-01 05:50:06.204 - abcDefghiJkl
2025-07-01 05:50:06.217 + abcdefGhijkl
2025-07-01 05:50:06.230 """
2025-07-01 05:50:06.237
2025-07-01 05:50:06.243 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:50:06.250 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:50:06.261 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:50:06.271 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:50:06.279 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:50:06.285
2025-07-01 05:50:06.291 # search for the pair that matches best without being identical
2025-07-01 05:50:06.299 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:50:06.305 # on junk -- unless we have to)
2025-07-01 05:50:06.312 for j in range(blo, bhi):
2025-07-01 05:50:06.319 bj = b[j]
2025-07-01 05:50:06.327 cruncher.set_seq2(bj)
2025-07-01 05:50:06.336 for i in range(alo, ahi):
2025-07-01 05:50:06.340 ai = a[i]
2025-07-01 05:50:06.345 if ai == bj:
2025-07-01 05:50:06.350 if eqi is None:
2025-07-01 05:50:06.355 eqi, eqj = i, j
2025-07-01 05:50:06.362 continue
2025-07-01 05:50:06.367 cruncher.set_seq1(ai)
2025-07-01 05:50:06.373 # computing similarity is expensive, so use the quick
2025-07-01 05:50:06.379 # upper bounds first -- have seen this speed up messy
2025-07-01 05:50:06.385 # compares by a factor of 3.
2025-07-01 05:50:06.397 # note that ratio() is only expensive to compute the first
2025-07-01 05:50:06.407 # time it's called on a sequence pair; the expensive part
2025-07-01 05:50:06.415 # of the computation is cached by cruncher
2025-07-01 05:50:06.424 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:50:06.430 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:50:06.439 cruncher.ratio() > best_ratio:
2025-07-01 05:50:06.449 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:50:06.462 if best_ratio < cutoff:
2025-07-01 05:50:06.472 # no non-identical "pretty close" pair
2025-07-01 05:50:06.479 if eqi is None:
2025-07-01 05:50:06.486 # no identical pair either -- treat it as a straight replace
2025-07-01 05:50:06.496 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:50:06.502 return
2025-07-01 05:50:06.508 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:50:06.514 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:50:06.523 else:
2025-07-01 05:50:06.533 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:50:06.541 eqi = None
2025-07-01 05:50:06.547
2025-07-01 05:50:06.552 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:50:06.561 # identical
2025-07-01 05:50:06.567
2025-07-01 05:50:06.572 # pump out diffs from before the synch point
2025-07-01 05:50:06.581 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:50:06.589
2025-07-01 05:50:06.594 # do intraline marking on the synch pair
2025-07-01 05:50:06.599 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:50:06.603 if eqi is None:
2025-07-01 05:50:06.609 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:50:06.614 atags = btags = ""
2025-07-01 05:50:06.619 cruncher.set_seqs(aelt, belt)
2025-07-01 05:50:06.624 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:50:06.629 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:50:06.633 if tag == 'replace':
2025-07-01 05:50:06.642 atags += '^' * la
2025-07-01 05:50:06.647 btags += '^' * lb
2025-07-01 05:50:06.652 elif tag == 'delete':
2025-07-01 05:50:06.658 atags += '-' * la
2025-07-01 05:50:06.663 elif tag == 'insert':
2025-07-01 05:50:06.669 btags += '+' * lb
2025-07-01 05:50:06.674 elif tag == 'equal':
2025-07-01 05:50:06.681 atags += ' ' * la
2025-07-01 05:50:06.687 btags += ' ' * lb
2025-07-01 05:50:06.693 else:
2025-07-01 05:50:06.700 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:50:06.707 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:50:06.717 else:
2025-07-01 05:50:06.732 # the synch pair is identical
2025-07-01 05:50:06.742 yield ' ' + aelt
2025-07-01 05:50:06.753
2025-07-01 05:50:06.765 # pump out diffs from after the synch point
2025-07-01 05:50:06.773 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:50:06.781
2025-07-01 05:50:06.790 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:50:06.799 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:50:06.807
2025-07-01 05:50:06.819 self = <difflib.Differ object at [hex]>
2025-07-01 05:50:06.827 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:50:06.834 alo = 415, ahi = 1101
2025-07-01 05:50:06.842 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:50:06.848 blo = 415, bhi = 1101
2025-07-01 05:50:06.854
2025-07-01 05:50:06.862 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:50:06.867 g = []
2025-07-01 05:50:06.873 if alo < ahi:
2025-07-01 05:50:06.879 if blo < bhi:
2025-07-01 05:50:06.884 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:50:06.889 else:
2025-07-01 05:50:06.894 g = self._dump('-', a, alo, ahi)
2025-07-01 05:50:06.899 elif blo < bhi:
2025-07-01 05:50:06.905 g = self._dump('+', b, blo, bhi)
2025-07-01 05:50:06.910
2025-07-01 05:50:06.918 > yield from g
2025-07-01 05:50:06.925
2025-07-01 05:50:06.931 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:50:06.938 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:50:06.944
2025-07-01 05:50:06.950 self = <difflib.Differ object at [hex]>
2025-07-01 05:50:06.956 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:50:06.962 alo = 415, ahi = 1101
2025-07-01 05:50:06.971 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:50:06.982 blo = 415, bhi = 1101
2025-07-01 05:50:06.993
2025-07-01 05:50:07.003 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:50:07.009 r"""
2025-07-01 05:50:07.016 When replacing one block of lines with another, search the blocks
2025-07-01 05:50:07.023 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:50:07.029 synch point, and intraline difference marking is done on the
2025-07-01 05:50:07.034 similar pair. Lots of work, but often worth it.
2025-07-01 05:50:07.038
2025-07-01 05:50:07.043 Example:
2025-07-01 05:50:07.049
2025-07-01 05:50:07.055 >>> d = Differ()
2025-07-01 05:50:07.061 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:50:07.068 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:50:07.075 >>> print(''.join(results), end="")
2025-07-01 05:50:07.081 - abcDefghiJkl
2025-07-01 05:50:07.091 + abcdefGhijkl
2025-07-01 05:50:07.100 """
2025-07-01 05:50:07.105
2025-07-01 05:50:07.109 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:50:07.114 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:50:07.119 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:50:07.125 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:50:07.130 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:50:07.135
2025-07-01 05:50:07.140 # search for the pair that matches best without being identical
2025-07-01 05:50:07.146 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:50:07.157 # on junk -- unless we have to)
2025-07-01 05:50:07.167 for j in range(blo, bhi):
2025-07-01 05:50:07.180 bj = b[j]
2025-07-01 05:50:07.192 cruncher.set_seq2(bj)
2025-07-01 05:50:07.199 for i in range(alo, ahi):
2025-07-01 05:50:07.210 ai = a[i]
2025-07-01 05:50:07.222 if ai == bj:
2025-07-01 05:50:07.234 if eqi is None:
2025-07-01 05:50:07.244 eqi, eqj = i, j
2025-07-01 05:50:07.257 continue
2025-07-01 05:50:07.268 cruncher.set_seq1(ai)
2025-07-01 05:50:07.279 # computing similarity is expensive, so use the quick
2025-07-01 05:50:07.287 # upper bounds first -- have seen this speed up messy
2025-07-01 05:50:07.295 # compares by a factor of 3.
2025-07-01 05:50:07.309 # note that ratio() is only expensive to compute the first
2025-07-01 05:50:07.318 # time it's called on a sequence pair; the expensive part
2025-07-01 05:50:07.327 # of the computation is cached by cruncher
2025-07-01 05:50:07.336 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:50:07.345 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:50:07.355 cruncher.ratio() > best_ratio:
2025-07-01 05:50:07.362 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:50:07.370 if best_ratio < cutoff:
2025-07-01 05:50:07.376 # no non-identical "pretty close" pair
2025-07-01 05:50:07.382 if eqi is None:
2025-07-01 05:50:07.387 # no identical pair either -- treat it as a straight replace
2025-07-01 05:50:07.393 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:50:07.399 return
2025-07-01 05:50:07.405 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:50:07.411 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:50:07.417 else:
2025-07-01 05:50:07.424 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:50:07.430 eqi = None
2025-07-01 05:50:07.435
2025-07-01 05:50:07.440 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:50:07.445 # identical
2025-07-01 05:50:07.451
2025-07-01 05:50:07.456 # pump out diffs from before the synch point
2025-07-01 05:50:07.461 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:50:07.466
2025-07-01 05:50:07.471 # do intraline marking on the synch pair
2025-07-01 05:50:07.476 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:50:07.480 if eqi is None:
2025-07-01 05:50:07.485 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:50:07.490 atags = btags = ""
2025-07-01 05:50:07.496 cruncher.set_seqs(aelt, belt)
2025-07-01 05:50:07.501 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:50:07.505 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:50:07.510 if tag == 'replace':
2025-07-01 05:50:07.515 atags += '^' * la
2025-07-01 05:50:07.520 btags += '^' * lb
2025-07-01 05:50:07.526 elif tag == 'delete':
2025-07-01 05:50:07.532 atags += '-' * la
2025-07-01 05:50:07.539 elif tag == 'insert':
2025-07-01 05:50:07.546 btags += '+' * lb
2025-07-01 05:50:07.555 elif tag == 'equal':
2025-07-01 05:50:07.565 atags += ' ' * la
2025-07-01 05:50:07.575 btags += ' ' * lb
2025-07-01 05:50:07.582 else:
2025-07-01 05:50:07.589 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:50:07.596 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:50:07.603 else:
2025-07-01 05:50:07.609 # the synch pair is identical
2025-07-01 05:50:07.616 yield ' ' + aelt
2025-07-01 05:50:07.622
2025-07-01 05:50:07.634 # pump out diffs from after the synch point
2025-07-01 05:50:07.645 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:50:07.653
2025-07-01 05:50:07.661 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:50:07.668 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:50:07.681
2025-07-01 05:50:07.692 self = <difflib.Differ object at [hex]>
2025-07-01 05:50:07.702 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:50:07.710 alo = 416, ahi = 1101
2025-07-01 05:50:07.722 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:50:07.733 blo = 416, bhi = 1101
2025-07-01 05:50:07.745
2025-07-01 05:50:07.753 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:50:07.763 g = []
2025-07-01 05:50:07.769 if alo < ahi:
2025-07-01 05:50:07.776 if blo < bhi:
2025-07-01 05:50:07.782 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:50:07.788 else:
2025-07-01 05:50:07.794 g = self._dump('-', a, alo, ahi)
2025-07-01 05:50:07.800 elif blo < bhi:
2025-07-01 05:50:07.809 g = self._dump('+', b, blo, bhi)
2025-07-01 05:50:07.815
2025-07-01 05:50:07.823 > yield from g
2025-07-01 05:50:07.835
2025-07-01 05:50:07.843 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:50:07.850 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:50:07.862
2025-07-01 05:50:07.872 self = <difflib.Differ object at [hex]>
2025-07-01 05:50:07.879 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:50:07.886 alo = 416, ahi = 1101
2025-07-01 05:50:07.893 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:50:07.899 blo = 416, bhi = 1101
2025-07-01 05:50:07.907
2025-07-01 05:50:07.917 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:50:07.926 r"""
2025-07-01 05:50:07.932 When replacing one block of lines with another, search the blocks
2025-07-01 05:50:07.938 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:50:07.943 synch point, and intraline difference marking is done on the
2025-07-01 05:50:07.947 similar pair. Lots of work, but often worth it.
2025-07-01 05:50:07.952
2025-07-01 05:50:07.957 Example:
2025-07-01 05:50:07.962
2025-07-01 05:50:07.968 >>> d = Differ()
2025-07-01 05:50:07.973 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:50:07.979 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:50:07.985 >>> print(''.join(results), end="")
2025-07-01 05:50:07.991 - abcDefghiJkl
2025-07-01 05:50:08.010 + abcdefGhijkl
2025-07-01 05:50:08.024 """
2025-07-01 05:50:08.033
2025-07-01 05:50:08.046 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:50:08.056 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:50:08.068 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:50:08.079 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:50:08.091 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:50:08.098
2025-07-01 05:50:08.104 # search for the pair that matches best without being identical
2025-07-01 05:50:08.109 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:50:08.114 # on junk -- unless we have to)
2025-07-01 05:50:08.124 for j in range(blo, bhi):
2025-07-01 05:50:08.135 bj = b[j]
2025-07-01 05:50:08.144 cruncher.set_seq2(bj)
2025-07-01 05:50:08.151 for i in range(alo, ahi):
2025-07-01 05:50:08.159 ai = a[i]
2025-07-01 05:50:08.166 if ai == bj:
2025-07-01 05:50:08.178 if eqi is None:
2025-07-01 05:50:08.189 eqi, eqj = i, j
2025-07-01 05:50:08.195 continue
2025-07-01 05:50:08.208 cruncher.set_seq1(ai)
2025-07-01 05:50:08.217 # computing similarity is expensive, so use the quick
2025-07-01 05:50:08.223 # upper bounds first -- have seen this speed up messy
2025-07-01 05:50:08.228 # compares by a factor of 3.
2025-07-01 05:50:08.234 # note that ratio() is only expensive to compute the first
2025-07-01 05:50:08.244 # time it's called on a sequence pair; the expensive part
2025-07-01 05:50:08.255 # of the computation is cached by cruncher
2025-07-01 05:50:08.267 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:50:08.279 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:50:08.288 cruncher.ratio() > best_ratio:
2025-07-01 05:50:08.299 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:50:08.309 if best_ratio < cutoff:
2025-07-01 05:50:08.318 # no non-identical "pretty close" pair
2025-07-01 05:50:08.327 if eqi is None:
2025-07-01 05:50:08.339 # no identical pair either -- treat it as a straight replace
2025-07-01 05:50:08.348 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:50:08.356 return
2025-07-01 05:50:08.363 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:50:08.371 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:50:08.382 else:
2025-07-01 05:50:08.390 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:50:08.396 eqi = None
2025-07-01 05:50:08.402
2025-07-01 05:50:08.409 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:50:08.415 # identical
2025-07-01 05:50:08.421
2025-07-01 05:50:08.428 # pump out diffs from before the synch point
2025-07-01 05:50:08.436 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:50:08.447
2025-07-01 05:50:08.459 # do intraline marking on the synch pair
2025-07-01 05:50:08.470 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:50:08.483 if eqi is None:
2025-07-01 05:50:08.492 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:50:08.500 atags = btags = ""
2025-07-01 05:50:08.508 cruncher.set_seqs(aelt, belt)
2025-07-01 05:50:08.518 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:50:08.525 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:50:08.531 if tag == 'replace':
2025-07-01 05:50:08.538 atags += '^' * la
2025-07-01 05:50:08.547 btags += '^' * lb
2025-07-01 05:50:08.554 elif tag == 'delete':
2025-07-01 05:50:08.561 atags += '-' * la
2025-07-01 05:50:08.568 elif tag == 'insert':
2025-07-01 05:50:08.573 btags += '+' * lb
2025-07-01 05:50:08.578 elif tag == 'equal':
2025-07-01 05:50:08.583 atags += ' ' * la
2025-07-01 05:50:08.588 btags += ' ' * lb
2025-07-01 05:50:08.594 else:
2025-07-01 05:50:08.600 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:50:08.607 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:50:08.614 else:
2025-07-01 05:50:08.624 # the synch pair is identical
2025-07-01 05:50:08.634 yield ' ' + aelt
2025-07-01 05:50:08.642
2025-07-01 05:50:08.648 # pump out diffs from after the synch point
2025-07-01 05:50:08.655 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:50:08.662
2025-07-01 05:50:08.674 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:50:08.684 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:50:08.693
2025-07-01 05:50:08.700 self = <difflib.Differ object at [hex]>
2025-07-01 05:50:08.712 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:50:08.718 alo = 417, ahi = 1101
2025-07-01 05:50:08.725 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:50:08.731 blo = 417, bhi = 1101
2025-07-01 05:50:08.739
2025-07-01 05:50:08.751 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:50:08.758 g = []
2025-07-01 05:50:08.766 if alo < ahi:
2025-07-01 05:50:08.776 if blo < bhi:
2025-07-01 05:50:08.790 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:50:08.799 else:
2025-07-01 05:50:08.806 g = self._dump('-', a, alo, ahi)
2025-07-01 05:50:08.817 elif blo < bhi:
2025-07-01 05:50:08.827 g = self._dump('+', b, blo, bhi)
2025-07-01 05:50:08.835
2025-07-01 05:50:08.846 > yield from g
2025-07-01 05:50:08.854
2025-07-01 05:50:08.861 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:50:08.872 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:50:08.881
2025-07-01 05:50:08.889 self = <difflib.Differ object at [hex]>
2025-07-01 05:50:08.897 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:50:08.903 alo = 417, ahi = 1101
2025-07-01 05:50:08.916 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:50:08.926 blo = 417, bhi = 1101
2025-07-01 05:50:08.935
2025-07-01 05:50:08.946 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:50:08.955 r"""
2025-07-01 05:50:08.963 When replacing one block of lines with another, search the blocks
2025-07-01 05:50:08.971 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:50:08.979 synch point, and intraline difference marking is done on the
2025-07-01 05:50:08.990 similar pair. Lots of work, but often worth it.
2025-07-01 05:50:09.001
2025-07-01 05:50:09.011 Example:
2025-07-01 05:50:09.022
2025-07-01 05:50:09.035 >>> d = Differ()
2025-07-01 05:50:09.046 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:50:09.054 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:50:09.061 >>> print(''.join(results), end="")
2025-07-01 05:50:09.070 - abcDefghiJkl
2025-07-01 05:50:09.094 + abcdefGhijkl
2025-07-01 05:50:09.112 """
2025-07-01 05:50:09.119
2025-07-01 05:50:09.127 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:50:09.139 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:50:09.149 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:50:09.157 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:50:09.164 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:50:09.170
2025-07-01 05:50:09.179 # search for the pair that matches best without being identical
2025-07-01 05:50:09.191 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:50:09.200 # on junk -- unless we have to)
2025-07-01 05:50:09.207 for j in range(blo, bhi):
2025-07-01 05:50:09.219 bj = b[j]
2025-07-01 05:50:09.226 cruncher.set_seq2(bj)
2025-07-01 05:50:09.232 for i in range(alo, ahi):
2025-07-01 05:50:09.238 ai = a[i]
2025-07-01 05:50:09.245 if ai == bj:
2025-07-01 05:50:09.251 if eqi is None:
2025-07-01 05:50:09.259 eqi, eqj = i, j
2025-07-01 05:50:09.268 continue
2025-07-01 05:50:09.276 cruncher.set_seq1(ai)
2025-07-01 05:50:09.282 # computing similarity is expensive, so use the quick
2025-07-01 05:50:09.288 # upper bounds first -- have seen this speed up messy
2025-07-01 05:50:09.294 # compares by a factor of 3.
2025-07-01 05:50:09.299 # note that ratio() is only expensive to compute the first
2025-07-01 05:50:09.305 # time it's called on a sequence pair; the expensive part
2025-07-01 05:50:09.312 # of the computation is cached by cruncher
2025-07-01 05:50:09.319 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:50:09.326 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:50:09.333 cruncher.ratio() > best_ratio:
2025-07-01 05:50:09.341 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:50:09.353 if best_ratio < cutoff:
2025-07-01 05:50:09.362 # no non-identical "pretty close" pair
2025-07-01 05:50:09.368 if eqi is None:
2025-07-01 05:50:09.374 # no identical pair either -- treat it as a straight replace
2025-07-01 05:50:09.380 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:50:09.386 return
2025-07-01 05:50:09.393 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:50:09.402 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:50:09.412 else:
2025-07-01 05:50:09.423 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:50:09.431 eqi = None
2025-07-01 05:50:09.441
2025-07-01 05:50:09.453 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:50:09.464 # identical
2025-07-01 05:50:09.476
2025-07-01 05:50:09.487 # pump out diffs from before the synch point
2025-07-01 05:50:09.495 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:50:09.503
2025-07-01 05:50:09.510 # do intraline marking on the synch pair
2025-07-01 05:50:09.520 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:50:09.531 if eqi is None:
2025-07-01 05:50:09.539 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:50:09.546 atags = btags = ""
2025-07-01 05:50:09.552 cruncher.set_seqs(aelt, belt)
2025-07-01 05:50:09.561 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:50:09.573 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:50:09.584 if tag == 'replace':
2025-07-01 05:50:09.594 atags += '^' * la
2025-07-01 05:50:09.606 btags += '^' * lb
2025-07-01 05:50:09.617 elif tag == 'delete':
2025-07-01 05:50:09.624 atags += '-' * la
2025-07-01 05:50:09.631 elif tag == 'insert':
2025-07-01 05:50:09.638 btags += '+' * lb
2025-07-01 05:50:09.644 elif tag == 'equal':
2025-07-01 05:50:09.650 atags += ' ' * la
2025-07-01 05:50:09.662 btags += ' ' * lb
2025-07-01 05:50:09.671 else:
2025-07-01 05:50:09.677 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:50:09.684 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:50:09.690 else:
2025-07-01 05:50:09.699 # the synch pair is identical
2025-07-01 05:50:09.709 yield ' ' + aelt
2025-07-01 05:50:09.720
2025-07-01 05:50:09.729 # pump out diffs from after the synch point
2025-07-01 05:50:09.736 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:50:09.741
2025-07-01 05:50:09.746 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:50:09.751 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:50:09.756
2025-07-01 05:50:09.761 self = <difflib.Differ object at [hex]>
2025-07-01 05:50:09.776 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:50:09.789 alo = 418, ahi = 1101
2025-07-01 05:50:09.800 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:50:09.811 blo = 418, bhi = 1101
2025-07-01 05:50:09.818
2025-07-01 05:50:09.824 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:50:09.830 g = []
2025-07-01 05:50:09.841 if alo < ahi:
2025-07-01 05:50:09.852 if blo < bhi:
2025-07-01 05:50:09.863 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:50:09.874 else:
2025-07-01 05:50:09.884 g = self._dump('-', a, alo, ahi)
2025-07-01 05:50:09.893 elif blo < bhi:
2025-07-01 05:50:09.903 g = self._dump('+', b, blo, bhi)
2025-07-01 05:50:09.913
2025-07-01 05:50:09.922 > yield from g
2025-07-01 05:50:09.932
2025-07-01 05:50:09.943 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:50:09.952 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:50:09.959
2025-07-01 05:50:09.967 self = <difflib.Differ object at [hex]>
2025-07-01 05:50:09.979 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:50:09.987 alo = 418, ahi = 1101
2025-07-01 05:50:09.995 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:50:10.002 blo = 418, bhi = 1101
2025-07-01 05:50:10.014
2025-07-01 05:50:10.024 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:50:10.033 r"""
2025-07-01 05:50:10.040 When replacing one block of lines with another, search the blocks
2025-07-01 05:50:10.047 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:50:10.054 synch point, and intraline difference marking is done on the
2025-07-01 05:50:10.061 similar pair. Lots of work, but often worth it.
2025-07-01 05:50:10.069
2025-07-01 05:50:10.076 Example:
2025-07-01 05:50:10.081
2025-07-01 05:50:10.086 >>> d = Differ()
2025-07-01 05:50:10.095 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:50:10.108 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:50:10.119 >>> print(''.join(results), end="")
2025-07-01 05:50:10.126 - abcDefghiJkl
2025-07-01 05:50:10.144 + abcdefGhijkl
2025-07-01 05:50:10.161 """
2025-07-01 05:50:10.167
2025-07-01 05:50:10.173 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:50:10.184 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:50:10.194 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:50:10.201 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:50:10.207 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:50:10.213
2025-07-01 05:50:10.223 # search for the pair that matches best without being identical
2025-07-01 05:50:10.229 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:50:10.235 # on junk -- unless we have to)
2025-07-01 05:50:10.241 for j in range(blo, bhi):
2025-07-01 05:50:10.247 bj = b[j]
2025-07-01 05:50:10.254 cruncher.set_seq2(bj)
2025-07-01 05:50:10.260 for i in range(alo, ahi):
2025-07-01 05:50:10.267 ai = a[i]
2025-07-01 05:50:10.274 if ai == bj:
2025-07-01 05:50:10.290 if eqi is None:
2025-07-01 05:50:10.298 eqi, eqj = i, j
2025-07-01 05:50:10.307 continue
2025-07-01 05:50:10.314 cruncher.set_seq1(ai)
2025-07-01 05:50:10.321 # computing similarity is expensive, so use the quick
2025-07-01 05:50:10.326 # upper bounds first -- have seen this speed up messy
2025-07-01 05:50:10.331 # compares by a factor of 3.
2025-07-01 05:50:10.337 # note that ratio() is only expensive to compute the first
2025-07-01 05:50:10.342 # time it's called on a sequence pair; the expensive part
2025-07-01 05:50:10.348 # of the computation is cached by cruncher
2025-07-01 05:50:10.355 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:50:10.361 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:50:10.368 cruncher.ratio() > best_ratio:
2025-07-01 05:50:10.375 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:50:10.382 if best_ratio < cutoff:
2025-07-01 05:50:10.389 # no non-identical "pretty close" pair
2025-07-01 05:50:10.396 if eqi is None:
2025-07-01 05:50:10.405 # no identical pair either -- treat it as a straight replace
2025-07-01 05:50:10.417 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:50:10.425 return
2025-07-01 05:50:10.431 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:50:10.438 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:50:10.443 else:
2025-07-01 05:50:10.451 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:50:10.462 eqi = None
2025-07-01 05:50:10.472
2025-07-01 05:50:10.481 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:50:10.489 # identical
2025-07-01 05:50:10.496
2025-07-01 05:50:10.503 # pump out diffs from before the synch point
2025-07-01 05:50:10.511 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:50:10.522
2025-07-01 05:50:10.530 # do intraline marking on the synch pair
2025-07-01 05:50:10.537 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:50:10.543 if eqi is None:
2025-07-01 05:50:10.550 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:50:10.557 atags = btags = ""
2025-07-01 05:50:10.563 cruncher.set_seqs(aelt, belt)
2025-07-01 05:50:10.571 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:50:10.582 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:50:10.591 if tag == 'replace':
2025-07-01 05:50:10.599 atags += '^' * la
2025-07-01 05:50:10.608 btags += '^' * lb
2025-07-01 05:50:10.614 elif tag == 'delete':
2025-07-01 05:50:10.620 atags += '-' * la
2025-07-01 05:50:10.626 elif tag == 'insert':
2025-07-01 05:50:10.637 btags += '+' * lb
2025-07-01 05:50:10.646 elif tag == 'equal':
2025-07-01 05:50:10.654 atags += ' ' * la
2025-07-01 05:50:10.663 btags += ' ' * lb
2025-07-01 05:50:10.673 else:
2025-07-01 05:50:10.681 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:50:10.687 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:50:10.693 else:
2025-07-01 05:50:10.699 # the synch pair is identical
2025-07-01 05:50:10.705 yield ' ' + aelt
2025-07-01 05:50:10.711
2025-07-01 05:50:10.717 # pump out diffs from after the synch point
2025-07-01 05:50:10.723 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:50:10.732
2025-07-01 05:50:10.744 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:50:10.754 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:50:10.765
2025-07-01 05:50:10.776 self = <difflib.Differ object at [hex]>
2025-07-01 05:50:10.785 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:50:10.792 alo = 419, ahi = 1101
2025-07-01 05:50:10.804 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:50:10.812 blo = 419, bhi = 1101
2025-07-01 05:50:10.820
2025-07-01 05:50:10.828 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:50:10.834 g = []
2025-07-01 05:50:10.841 if alo < ahi:
2025-07-01 05:50:10.847 if blo < bhi:
2025-07-01 05:50:10.853 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:50:10.859 else:
2025-07-01 05:50:10.866 g = self._dump('-', a, alo, ahi)
2025-07-01 05:50:10.874 elif blo < bhi:
2025-07-01 05:50:10.881 g = self._dump('+', b, blo, bhi)
2025-07-01 05:50:10.887
2025-07-01 05:50:10.892 > yield from g
2025-07-01 05:50:10.897
2025-07-01 05:50:10.902 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:50:10.907 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:50:10.912
2025-07-01 05:50:10.918 self = <difflib.Differ object at [hex]>
2025-07-01 05:50:10.925 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:50:10.932 alo = 419, ahi = 1101
2025-07-01 05:50:10.941 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:50:10.946 blo = 419, bhi = 1101
2025-07-01 05:50:10.951
2025-07-01 05:50:10.956 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:50:10.961 r"""
2025-07-01 05:50:10.965 When replacing one block of lines with another, search the blocks
2025-07-01 05:50:10.970 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:50:10.974 synch point, and intraline difference marking is done on the
2025-07-01 05:50:10.979 similar pair. Lots of work, but often worth it.
2025-07-01 05:50:10.983
2025-07-01 05:50:10.988 Example:
2025-07-01 05:50:10.992
2025-07-01 05:50:10.997 >>> d = Differ()
2025-07-01 05:50:11.001 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:50:11.006 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:50:11.010 >>> print(''.join(results), end="")
2025-07-01 05:50:11.016 - abcDefghiJkl
2025-07-01 05:50:11.027 + abcdefGhijkl
2025-07-01 05:50:11.038 """
2025-07-01 05:50:11.047
2025-07-01 05:50:11.057 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:50:11.064 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:50:11.070 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:50:11.075 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:50:11.080 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:50:11.085
2025-07-01 05:50:11.090 # search for the pair that matches best without being identical
2025-07-01 05:50:11.096 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:50:11.101 # on junk -- unless we have to)
2025-07-01 05:50:11.108 for j in range(blo, bhi):
2025-07-01 05:50:11.114 bj = b[j]
2025-07-01 05:50:11.120 cruncher.set_seq2(bj)
2025-07-01 05:50:11.126 for i in range(alo, ahi):
2025-07-01 05:50:11.133 ai = a[i]
2025-07-01 05:50:11.140 if ai == bj:
2025-07-01 05:50:11.147 if eqi is None:
2025-07-01 05:50:11.154 eqi, eqj = i, j
2025-07-01 05:50:11.164 continue
2025-07-01 05:50:11.175 cruncher.set_seq1(ai)
2025-07-01 05:50:11.181 # computing similarity is expensive, so use the quick
2025-07-01 05:50:11.186 # upper bounds first -- have seen this speed up messy
2025-07-01 05:50:11.192 # compares by a factor of 3.
2025-07-01 05:50:11.198 # note that ratio() is only expensive to compute the first
2025-07-01 05:50:11.204 # time it's called on a sequence pair; the expensive part
2025-07-01 05:50:11.210 # of the computation is cached by cruncher
2025-07-01 05:50:11.222 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:50:11.233 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:50:11.241 cruncher.ratio() > best_ratio:
2025-07-01 05:50:11.248 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:50:11.258 if best_ratio < cutoff:
2025-07-01 05:50:11.268 # no non-identical "pretty close" pair
2025-07-01 05:50:11.277 if eqi is None:
2025-07-01 05:50:11.288 # no identical pair either -- treat it as a straight replace
2025-07-01 05:50:11.298 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:50:11.309 return
2025-07-01 05:50:11.319 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:50:11.332 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:50:11.344 else:
2025-07-01 05:50:11.356 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:50:11.365 eqi = None
2025-07-01 05:50:11.372
2025-07-01 05:50:11.378 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:50:11.383 # identical
2025-07-01 05:50:11.388
2025-07-01 05:50:11.397 # pump out diffs from before the synch point
2025-07-01 05:50:11.409 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:50:11.419
2025-07-01 05:50:11.430 # do intraline marking on the synch pair
2025-07-01 05:50:11.444 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:50:11.457 if eqi is None:
2025-07-01 05:50:11.466 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:50:11.472 atags = btags = ""
2025-07-01 05:50:11.478 cruncher.set_seqs(aelt, belt)
2025-07-01 05:50:11.484 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:50:11.490 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:50:11.496 if tag == 'replace':
2025-07-01 05:50:11.502 atags += '^' * la
2025-07-01 05:50:11.511 btags += '^' * lb
2025-07-01 05:50:11.518 elif tag == 'delete':
2025-07-01 05:50:11.524 atags += '-' * la
2025-07-01 05:50:11.530 elif tag == 'insert':
2025-07-01 05:50:11.535 btags += '+' * lb
2025-07-01 05:50:11.541 elif tag == 'equal':
2025-07-01 05:50:11.547 atags += ' ' * la
2025-07-01 05:50:11.557 btags += ' ' * lb
2025-07-01 05:50:11.565 else:
2025-07-01 05:50:11.572 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:50:11.580 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:50:11.586 else:
2025-07-01 05:50:11.592 # the synch pair is identical
2025-07-01 05:50:11.598 yield ' ' + aelt
2025-07-01 05:50:11.605
2025-07-01 05:50:11.611 # pump out diffs from after the synch point
2025-07-01 05:50:11.622 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:50:11.634
2025-07-01 05:50:11.644 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:50:11.653 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:50:11.659
2025-07-01 05:50:11.665 self = <difflib.Differ object at [hex]>
2025-07-01 05:50:11.671 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:50:11.677 alo = 422, ahi = 1101
2025-07-01 05:50:11.685 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:50:11.690 blo = 422, bhi = 1101
2025-07-01 05:50:11.697
2025-07-01 05:50:11.704 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:50:11.711 g = []
2025-07-01 05:50:11.718 if alo < ahi:
2025-07-01 05:50:11.726 if blo < bhi:
2025-07-01 05:50:11.733 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:50:11.740 else:
2025-07-01 05:50:11.746 g = self._dump('-', a, alo, ahi)
2025-07-01 05:50:11.753 elif blo < bhi:
2025-07-01 05:50:11.760 g = self._dump('+', b, blo, bhi)
2025-07-01 05:50:11.767
2025-07-01 05:50:11.773 > yield from g
2025-07-01 05:50:11.780
2025-07-01 05:50:11.787 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:50:11.794 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:50:11.800
2025-07-01 05:50:11.807 self = <difflib.Differ object at [hex]>
2025-07-01 05:50:11.815 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:50:11.822 alo = 422, ahi = 1101
2025-07-01 05:50:11.830 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:50:11.837 blo = 422, bhi = 1101
2025-07-01 05:50:11.844
2025-07-01 05:50:11.851 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:50:11.858 r"""
2025-07-01 05:50:11.865 When replacing one block of lines with another, search the blocks
2025-07-01 05:50:11.876 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:50:11.887 synch point, and intraline difference marking is done on the
2025-07-01 05:50:11.894 similar pair. Lots of work, but often worth it.
2025-07-01 05:50:11.901
2025-07-01 05:50:11.912 Example:
2025-07-01 05:50:11.919
2025-07-01 05:50:11.925 >>> d = Differ()
2025-07-01 05:50:11.936 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:50:11.947 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:50:11.959 >>> print(''.join(results), end="")
2025-07-01 05:50:11.972 - abcDefghiJkl
2025-07-01 05:50:11.995 + abcdefGhijkl
2025-07-01 05:50:12.018 """
2025-07-01 05:50:12.031
2025-07-01 05:50:12.042 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:50:12.055 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:50:12.065 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:50:12.077 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:50:12.089 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:50:12.097
2025-07-01 05:50:12.107 # search for the pair that matches best without being identical
2025-07-01 05:50:12.112 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:50:12.117 # on junk -- unless we have to)
2025-07-01 05:50:12.122 for j in range(blo, bhi):
2025-07-01 05:50:12.127 bj = b[j]
2025-07-01 05:50:12.132 cruncher.set_seq2(bj)
2025-07-01 05:50:12.138 for i in range(alo, ahi):
2025-07-01 05:50:12.148 ai = a[i]
2025-07-01 05:50:12.155 if ai == bj:
2025-07-01 05:50:12.162 if eqi is None:
2025-07-01 05:50:12.168 eqi, eqj = i, j
2025-07-01 05:50:12.174 continue
2025-07-01 05:50:12.178 cruncher.set_seq1(ai)
2025-07-01 05:50:12.184 # computing similarity is expensive, so use the quick
2025-07-01 05:50:12.191 # upper bounds first -- have seen this speed up messy
2025-07-01 05:50:12.200 # compares by a factor of 3.
2025-07-01 05:50:12.212 # note that ratio() is only expensive to compute the first
2025-07-01 05:50:12.224 # time it's called on a sequence pair; the expensive part
2025-07-01 05:50:12.232 # of the computation is cached by cruncher
2025-07-01 05:50:12.240 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:50:12.248 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:50:12.255 cruncher.ratio() > best_ratio:
2025-07-01 05:50:12.263 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:50:12.271 if best_ratio < cutoff:
2025-07-01 05:50:12.281 # no non-identical "pretty close" pair
2025-07-01 05:50:12.290 if eqi is None:
2025-07-01 05:50:12.297 # no identical pair either -- treat it as a straight replace
2025-07-01 05:50:12.303 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:50:12.308 return
2025-07-01 05:50:12.313 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:50:12.318 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:50:12.322 else:
2025-07-01 05:50:12.327 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:50:12.333 eqi = None
2025-07-01 05:50:12.346
2025-07-01 05:50:12.355 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:50:12.365 # identical
2025-07-01 05:50:12.375
2025-07-01 05:50:12.387 # pump out diffs from before the synch point
2025-07-01 05:50:12.397 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:50:12.408
2025-07-01 05:50:12.418 # do intraline marking on the synch pair
2025-07-01 05:50:12.428 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:50:12.440 if eqi is None:
2025-07-01 05:50:12.454 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:50:12.464 atags = btags = ""
2025-07-01 05:50:12.474 cruncher.set_seqs(aelt, belt)
2025-07-01 05:50:12.485 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:50:12.495 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:50:12.503 if tag == 'replace':
2025-07-01 05:50:12.510 atags += '^' * la
2025-07-01 05:50:12.522 btags += '^' * lb
2025-07-01 05:50:12.530 elif tag == 'delete':
2025-07-01 05:50:12.536 atags += '-' * la
2025-07-01 05:50:12.541 elif tag == 'insert':
2025-07-01 05:50:12.546 btags += '+' * lb
2025-07-01 05:50:12.555 elif tag == 'equal':
2025-07-01 05:50:12.566 atags += ' ' * la
2025-07-01 05:50:12.573 btags += ' ' * lb
2025-07-01 05:50:12.580 else:
2025-07-01 05:50:12.586 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:50:12.592 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:50:12.596 else:
2025-07-01 05:50:12.601 # the synch pair is identical
2025-07-01 05:50:12.607 yield ' ' + aelt
2025-07-01 05:50:12.615
2025-07-01 05:50:12.626 # pump out diffs from after the synch point
2025-07-01 05:50:12.637 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:50:12.649
2025-07-01 05:50:12.656 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:50:12.662 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:50:12.670
2025-07-01 05:50:12.682 self = <difflib.Differ object at [hex]>
2025-07-01 05:50:12.693 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:50:12.700 alo = 423, ahi = 1101
2025-07-01 05:50:12.708 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:50:12.714 blo = 423, bhi = 1101
2025-07-01 05:50:12.722
2025-07-01 05:50:12.733 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:50:12.743 g = []
2025-07-01 05:50:12.753 if alo < ahi:
2025-07-01 05:50:12.763 if blo < bhi:
2025-07-01 05:50:12.772 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:50:12.780 else:
2025-07-01 05:50:12.789 g = self._dump('-', a, alo, ahi)
2025-07-01 05:50:12.796 elif blo < bhi:
2025-07-01 05:50:12.801 g = self._dump('+', b, blo, bhi)
2025-07-01 05:50:12.806
2025-07-01 05:50:12.815 > yield from g
2025-07-01 05:50:12.823
2025-07-01 05:50:12.829 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:50:12.836 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:50:12.841
2025-07-01 05:50:12.846 self = <difflib.Differ object at [hex]>
2025-07-01 05:50:12.855 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:50:12.864 alo = 423, ahi = 1101
2025-07-01 05:50:12.873 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:50:12.882 blo = 423, bhi = 1101
2025-07-01 05:50:12.895
2025-07-01 05:50:12.906 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:50:12.912 r"""
2025-07-01 05:50:12.919 When replacing one block of lines with another, search the blocks
2025-07-01 05:50:12.926 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:50:12.937 synch point, and intraline difference marking is done on the
2025-07-01 05:50:12.945 similar pair. Lots of work, but often worth it.
2025-07-01 05:50:12.952
2025-07-01 05:50:12.959 Example:
2025-07-01 05:50:12.968
2025-07-01 05:50:12.977 >>> d = Differ()
2025-07-01 05:50:12.983 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:50:12.988 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:50:12.993 >>> print(''.join(results), end="")
2025-07-01 05:50:12.998 - abcDefghiJkl
2025-07-01 05:50:13.009 + abcdefGhijkl
2025-07-01 05:50:13.019 """
2025-07-01 05:50:13.024
2025-07-01 05:50:13.029 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:50:13.033 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:50:13.038 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:50:13.045 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:50:13.051 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:50:13.058
2025-07-01 05:50:13.072 # search for the pair that matches best without being identical
2025-07-01 05:50:13.084 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:50:13.092 # on junk -- unless we have to)
2025-07-01 05:50:13.099 for j in range(blo, bhi):
2025-07-01 05:50:13.104 bj = b[j]
2025-07-01 05:50:13.110 cruncher.set_seq2(bj)
2025-07-01 05:50:13.121 for i in range(alo, ahi):
2025-07-01 05:50:13.130 ai = a[i]
2025-07-01 05:50:13.138 if ai == bj:
2025-07-01 05:50:13.144 if eqi is None:
2025-07-01 05:50:13.151 eqi, eqj = i, j
2025-07-01 05:50:13.157 continue
2025-07-01 05:50:13.163 cruncher.set_seq1(ai)
2025-07-01 05:50:13.169 # computing similarity is expensive, so use the quick
2025-07-01 05:50:13.176 # upper bounds first -- have seen this speed up messy
2025-07-01 05:50:13.183 # compares by a factor of 3.
2025-07-01 05:50:13.193 # note that ratio() is only expensive to compute the first
2025-07-01 05:50:13.204 # time it's called on a sequence pair; the expensive part
2025-07-01 05:50:13.213 # of the computation is cached by cruncher
2025-07-01 05:50:13.219 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:50:13.227 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:50:13.233 cruncher.ratio() > best_ratio:
2025-07-01 05:50:13.239 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:50:13.245 if best_ratio < cutoff:
2025-07-01 05:50:13.251 # no non-identical "pretty close" pair
2025-07-01 05:50:13.258 if eqi is None:
2025-07-01 05:50:13.269 # no identical pair either -- treat it as a straight replace
2025-07-01 05:50:13.278 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:50:13.285 return
2025-07-01 05:50:13.291 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:50:13.296 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:50:13.301 else:
2025-07-01 05:50:13.306 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:50:13.311 eqi = None
2025-07-01 05:50:13.316
2025-07-01 05:50:13.322 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:50:13.328 # identical
2025-07-01 05:50:13.334
2025-07-01 05:50:13.346 # pump out diffs from before the synch point
2025-07-01 05:50:13.355 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:50:13.361
2025-07-01 05:50:13.367 # do intraline marking on the synch pair
2025-07-01 05:50:13.373 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:50:13.379 if eqi is None:
2025-07-01 05:50:13.386 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:50:13.392 atags = btags = ""
2025-07-01 05:50:13.399 cruncher.set_seqs(aelt, belt)
2025-07-01 05:50:13.406 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:50:13.415 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:50:13.426 if tag == 'replace':
2025-07-01 05:50:13.435 atags += '^' * la
2025-07-01 05:50:13.441 btags += '^' * lb
2025-07-01 05:50:13.447 elif tag == 'delete':
2025-07-01 05:50:13.453 atags += '-' * la
2025-07-01 05:50:13.459 elif tag == 'insert':
2025-07-01 05:50:13.464 btags += '+' * lb
2025-07-01 05:50:13.470 elif tag == 'equal':
2025-07-01 05:50:13.476 atags += ' ' * la
2025-07-01 05:50:13.482 btags += ' ' * lb
2025-07-01 05:50:13.487 else:
2025-07-01 05:50:13.493 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:50:13.499 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:50:13.505 else:
2025-07-01 05:50:13.510 # the synch pair is identical
2025-07-01 05:50:13.514 yield ' ' + aelt
2025-07-01 05:50:13.519
2025-07-01 05:50:13.524 # pump out diffs from after the synch point
2025-07-01 05:50:13.530 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:50:13.536
2025-07-01 05:50:13.541 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:50:13.546 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:50:13.556
2025-07-01 05:50:13.566 self = <difflib.Differ object at [hex]>
2025-07-01 05:50:13.576 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:50:13.586 alo = 424, ahi = 1101
2025-07-01 05:50:13.596 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:50:13.603 blo = 424, bhi = 1101
2025-07-01 05:50:13.615
2025-07-01 05:50:13.625 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:50:13.636 g = []
2025-07-01 05:50:13.647 if alo < ahi:
2025-07-01 05:50:13.656 if blo < bhi:
2025-07-01 05:50:13.664 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:50:13.671 else:
2025-07-01 05:50:13.677 g = self._dump('-', a, alo, ahi)
2025-07-01 05:50:13.683 elif blo < bhi:
2025-07-01 05:50:13.689 g = self._dump('+', b, blo, bhi)
2025-07-01 05:50:13.694
2025-07-01 05:50:13.700 > yield from g
2025-07-01 05:50:13.707
2025-07-01 05:50:13.716 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:50:13.725 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:50:13.733
2025-07-01 05:50:13.740 self = <difflib.Differ object at [hex]>
2025-07-01 05:50:13.747 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:50:13.759 alo = 424, ahi = 1101
2025-07-01 05:50:13.770 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:50:13.780 blo = 424, bhi = 1101
2025-07-01 05:50:13.791
2025-07-01 05:50:13.802 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:50:13.812 r"""
2025-07-01 05:50:13.823 When replacing one block of lines with another, search the blocks
2025-07-01 05:50:13.831 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:50:13.838 synch point, and intraline difference marking is done on the
2025-07-01 05:50:13.845 similar pair. Lots of work, but often worth it.
2025-07-01 05:50:13.851
2025-07-01 05:50:13.859 Example:
2025-07-01 05:50:13.869
2025-07-01 05:50:13.876 >>> d = Differ()
2025-07-01 05:50:13.883 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:50:13.895 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:50:13.906 >>> print(''.join(results), end="")
2025-07-01 05:50:13.914 - abcDefghiJkl
2025-07-01 05:50:13.929 + abcdefGhijkl
2025-07-01 05:50:13.944 """
2025-07-01 05:50:13.950
2025-07-01 05:50:13.956 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:50:13.962 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:50:13.969 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:50:13.980 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:50:13.990 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:50:14.000
2025-07-01 05:50:14.013 # search for the pair that matches best without being identical
2025-07-01 05:50:14.021 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:50:14.032 # on junk -- unless we have to)
2025-07-01 05:50:14.043 for j in range(blo, bhi):
2025-07-01 05:50:14.051 bj = b[j]
2025-07-01 05:50:14.059 cruncher.set_seq2(bj)
2025-07-01 05:50:14.066 for i in range(alo, ahi):
2025-07-01 05:50:14.077 ai = a[i]
2025-07-01 05:50:14.090 if ai == bj:
2025-07-01 05:50:14.099 if eqi is None:
2025-07-01 05:50:14.107 eqi, eqj = i, j
2025-07-01 05:50:14.120 continue
2025-07-01 05:50:14.132 cruncher.set_seq1(ai)
2025-07-01 05:50:14.142 # computing similarity is expensive, so use the quick
2025-07-01 05:50:14.154 # upper bounds first -- have seen this speed up messy
2025-07-01 05:50:14.164 # compares by a factor of 3.
2025-07-01 05:50:14.177 # note that ratio() is only expensive to compute the first
2025-07-01 05:50:14.188 # time it's called on a sequence pair; the expensive part
2025-07-01 05:50:14.197 # of the computation is cached by cruncher
2025-07-01 05:50:14.210 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:50:14.222 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:50:14.231 cruncher.ratio() > best_ratio:
2025-07-01 05:50:14.239 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:50:14.252 if best_ratio < cutoff:
2025-07-01 05:50:14.261 # no non-identical "pretty close" pair
2025-07-01 05:50:14.273 if eqi is None:
2025-07-01 05:50:14.283 # no identical pair either -- treat it as a straight replace
2025-07-01 05:50:14.293 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:50:14.305 return
2025-07-01 05:50:14.315 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:50:14.324 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:50:14.331 else:
2025-07-01 05:50:14.338 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:50:14.349 eqi = None
2025-07-01 05:50:14.358
2025-07-01 05:50:14.366 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:50:14.377 # identical
2025-07-01 05:50:14.387
2025-07-01 05:50:14.398 # pump out diffs from before the synch point
2025-07-01 05:50:14.410 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:50:14.421
2025-07-01 05:50:14.434 # do intraline marking on the synch pair
2025-07-01 05:50:14.443 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:50:14.451 if eqi is None:
2025-07-01 05:50:14.458 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:50:14.467 atags = btags = ""
2025-07-01 05:50:14.479 cruncher.set_seqs(aelt, belt)
2025-07-01 05:50:14.488 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:50:14.497 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:50:14.509 if tag == 'replace':
2025-07-01 05:50:14.519 atags += '^' * la
2025-07-01 05:50:14.530 btags += '^' * lb
2025-07-01 05:50:14.541 elif tag == 'delete':
2025-07-01 05:50:14.553 atags += '-' * la
2025-07-01 05:50:14.562 elif tag == 'insert':
2025-07-01 05:50:14.570 btags += '+' * lb
2025-07-01 05:50:14.580 elif tag == 'equal':
2025-07-01 05:50:14.590 atags += ' ' * la
2025-07-01 05:50:14.599 btags += ' ' * lb
2025-07-01 05:50:14.606 else:
2025-07-01 05:50:14.617 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:50:14.626 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:50:14.633 else:
2025-07-01 05:50:14.639 # the synch pair is identical
2025-07-01 05:50:14.644 yield ' ' + aelt
2025-07-01 05:50:14.651
2025-07-01 05:50:14.661 # pump out diffs from after the synch point
2025-07-01 05:50:14.669 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:50:14.676
2025-07-01 05:50:14.683 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:50:14.693 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:50:14.702
2025-07-01 05:50:14.711 self = <difflib.Differ object at [hex]>
2025-07-01 05:50:14.721 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:50:14.728 alo = 425, ahi = 1101
2025-07-01 05:50:14.734 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:50:14.740 blo = 425, bhi = 1101
2025-07-01 05:50:14.748
2025-07-01 05:50:14.758 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:50:14.766 g = []
2025-07-01 05:50:14.772 if alo < ahi:
2025-07-01 05:50:14.779 if blo < bhi:
2025-07-01 05:50:14.786 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:50:14.792 else:
2025-07-01 05:50:14.797 g = self._dump('-', a, alo, ahi)
2025-07-01 05:50:14.803 elif blo < bhi:
2025-07-01 05:50:14.808 g = self._dump('+', b, blo, bhi)
2025-07-01 05:50:14.814
2025-07-01 05:50:14.819 > yield from g
2025-07-01 05:50:14.825
2025-07-01 05:50:14.830 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:50:14.837 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:50:14.844
2025-07-01 05:50:14.851 self = <difflib.Differ object at [hex]>
2025-07-01 05:50:14.857 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:50:14.861 alo = 425, ahi = 1101
2025-07-01 05:50:14.867 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:50:14.872 blo = 425, bhi = 1101
2025-07-01 05:50:14.879
2025-07-01 05:50:14.885 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:50:14.890 r"""
2025-07-01 05:50:14.896 When replacing one block of lines with another, search the blocks
2025-07-01 05:50:14.901 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:50:14.908 synch point, and intraline difference marking is done on the
2025-07-01 05:50:14.914 similar pair. Lots of work, but often worth it.
2025-07-01 05:50:14.920
2025-07-01 05:50:14.926 Example:
2025-07-01 05:50:14.936
2025-07-01 05:50:14.945 >>> d = Differ()
2025-07-01 05:50:14.958 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:50:14.969 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:50:14.980 >>> print(''.join(results), end="")
2025-07-01 05:50:14.990 - abcDefghiJkl
2025-07-01 05:50:15.004 + abcdefGhijkl
2025-07-01 05:50:15.017 """
2025-07-01 05:50:15.023
2025-07-01 05:50:15.031 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:50:15.038 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:50:15.049 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:50:15.058 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:50:15.066 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:50:15.072
2025-07-01 05:50:15.079 # search for the pair that matches best without being identical
2025-07-01 05:50:15.085 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:50:15.091 # on junk -- unless we have to)
2025-07-01 05:50:15.098 for j in range(blo, bhi):
2025-07-01 05:50:15.106 bj = b[j]
2025-07-01 05:50:15.117 cruncher.set_seq2(bj)
2025-07-01 05:50:15.125 for i in range(alo, ahi):
2025-07-01 05:50:15.132 ai = a[i]
2025-07-01 05:50:15.138 if ai == bj:
2025-07-01 05:50:15.144 if eqi is None:
2025-07-01 05:50:15.150 eqi, eqj = i, j
2025-07-01 05:50:15.156 continue
2025-07-01 05:50:15.162 cruncher.set_seq1(ai)
2025-07-01 05:50:15.171 # computing similarity is expensive, so use the quick
2025-07-01 05:50:15.183 # upper bounds first -- have seen this speed up messy
2025-07-01 05:50:15.191 # compares by a factor of 3.
2025-07-01 05:50:15.199 # note that ratio() is only expensive to compute the first
2025-07-01 05:50:15.212 # time it's called on a sequence pair; the expensive part
2025-07-01 05:50:15.221 # of the computation is cached by cruncher
2025-07-01 05:50:15.228 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:50:15.235 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:50:15.241 cruncher.ratio() > best_ratio:
2025-07-01 05:50:15.248 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:50:15.254 if best_ratio < cutoff:
2025-07-01 05:50:15.259 # no non-identical "pretty close" pair
2025-07-01 05:50:15.265 if eqi is None:
2025-07-01 05:50:15.271 # no identical pair either -- treat it as a straight replace
2025-07-01 05:50:15.277 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:50:15.282 return
2025-07-01 05:50:15.288 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:50:15.295 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:50:15.301 else:
2025-07-01 05:50:15.307 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:50:15.313 eqi = None
2025-07-01 05:50:15.318
2025-07-01 05:50:15.324 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:50:15.332 # identical
2025-07-01 05:50:15.343
2025-07-01 05:50:15.352 # pump out diffs from before the synch point
2025-07-01 05:50:15.359 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:50:15.365
2025-07-01 05:50:15.370 # do intraline marking on the synch pair
2025-07-01 05:50:15.380 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:50:15.390 if eqi is None:
2025-07-01 05:50:15.397 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:50:15.403 atags = btags = ""
2025-07-01 05:50:15.409 cruncher.set_seqs(aelt, belt)
2025-07-01 05:50:15.415 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:50:15.421 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:50:15.433 if tag == 'replace':
2025-07-01 05:50:15.441 atags += '^' * la
2025-07-01 05:50:15.448 btags += '^' * lb
2025-07-01 05:50:15.455 elif tag == 'delete':
2025-07-01 05:50:15.460 atags += '-' * la
2025-07-01 05:50:15.466 elif tag == 'insert':
2025-07-01 05:50:15.471 btags += '+' * lb
2025-07-01 05:50:15.477 elif tag == 'equal':
2025-07-01 05:50:15.482 atags += ' ' * la
2025-07-01 05:50:15.488 btags += ' ' * lb
2025-07-01 05:50:15.493 else:
2025-07-01 05:50:15.499 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:50:15.505 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:50:15.512 else:
2025-07-01 05:50:15.519 # the synch pair is identical
2025-07-01 05:50:15.526 yield ' ' + aelt
2025-07-01 05:50:15.534
2025-07-01 05:50:15.540 # pump out diffs from after the synch point
2025-07-01 05:50:15.547 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:50:15.555
2025-07-01 05:50:15.567 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:50:15.578 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:50:15.587
2025-07-01 05:50:15.593 self = <difflib.Differ object at [hex]>
2025-07-01 05:50:15.600 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:50:15.607 alo = 426, ahi = 1101
2025-07-01 05:50:15.620 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:50:15.628 blo = 426, bhi = 1101
2025-07-01 05:50:15.636
2025-07-01 05:50:15.648 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:50:15.657 g = []
2025-07-01 05:50:15.668 if alo < ahi:
2025-07-01 05:50:15.674 if blo < bhi:
2025-07-01 05:50:15.684 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:50:15.695 else:
2025-07-01 05:50:15.704 g = self._dump('-', a, alo, ahi)
2025-07-01 05:50:15.712 elif blo < bhi:
2025-07-01 05:50:15.726 g = self._dump('+', b, blo, bhi)
2025-07-01 05:50:15.735
2025-07-01 05:50:15.742 > yield from g
2025-07-01 05:50:15.748
2025-07-01 05:50:15.753 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:50:15.758 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:50:15.762
2025-07-01 05:50:15.767 self = <difflib.Differ object at [hex]>
2025-07-01 05:50:15.772 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:50:15.776 alo = 426, ahi = 1101
2025-07-01 05:50:15.782 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:50:15.787 blo = 426, bhi = 1101
2025-07-01 05:50:15.792
2025-07-01 05:50:15.799 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:50:15.809 r"""
2025-07-01 05:50:15.818 When replacing one block of lines with another, search the blocks
2025-07-01 05:50:15.828 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:50:15.837 synch point, and intraline difference marking is done on the
2025-07-01 05:50:15.850 similar pair. Lots of work, but often worth it.
2025-07-01 05:50:15.861
2025-07-01 05:50:15.869 Example:
2025-07-01 05:50:15.875
2025-07-01 05:50:15.881 >>> d = Differ()
2025-07-01 05:50:15.887 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:50:15.892 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:50:15.905 >>> print(''.join(results), end="")
2025-07-01 05:50:15.916 - abcDefghiJkl
2025-07-01 05:50:15.941 + abcdefGhijkl
2025-07-01 05:50:15.963 """
2025-07-01 05:50:15.974
2025-07-01 05:50:15.987 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:50:15.996 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:50:16.003 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:50:16.010 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:50:16.017 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:50:16.022
2025-07-01 05:50:16.028 # search for the pair that matches best without being identical
2025-07-01 05:50:16.035 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:50:16.041 # on junk -- unless we have to)
2025-07-01 05:50:16.046 for j in range(blo, bhi):
2025-07-01 05:50:16.052 bj = b[j]
2025-07-01 05:50:16.060 cruncher.set_seq2(bj)
2025-07-01 05:50:16.075 for i in range(alo, ahi):
2025-07-01 05:50:16.086 ai = a[i]
2025-07-01 05:50:16.093 if ai == bj:
2025-07-01 05:50:16.100 if eqi is None:
2025-07-01 05:50:16.105 eqi, eqj = i, j
2025-07-01 05:50:16.111 continue
2025-07-01 05:50:16.116 cruncher.set_seq1(ai)
2025-07-01 05:50:16.121 # computing similarity is expensive, so use the quick
2025-07-01 05:50:16.129 # upper bounds first -- have seen this speed up messy
2025-07-01 05:50:16.136 # compares by a factor of 3.
2025-07-01 05:50:16.144 # note that ratio() is only expensive to compute the first
2025-07-01 05:50:16.151 # time it's called on a sequence pair; the expensive part
2025-07-01 05:50:16.159 # of the computation is cached by cruncher
2025-07-01 05:50:16.170 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:50:16.180 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:50:16.188 cruncher.ratio() > best_ratio:
2025-07-01 05:50:16.195 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:50:16.203 if best_ratio < cutoff:
2025-07-01 05:50:16.215 # no non-identical "pretty close" pair
2025-07-01 05:50:16.225 if eqi is None:
2025-07-01 05:50:16.237 # no identical pair either -- treat it as a straight replace
2025-07-01 05:50:16.249 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:50:16.260 return
2025-07-01 05:50:16.269 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:50:16.281 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:50:16.291 else:
2025-07-01 05:50:16.300 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:50:16.307 eqi = None
2025-07-01 05:50:16.315
2025-07-01 05:50:16.327 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:50:16.336 # identical
2025-07-01 05:50:16.343
2025-07-01 05:50:16.350 # pump out diffs from before the synch point
2025-07-01 05:50:16.362 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:50:16.373
2025-07-01 05:50:16.382 # do intraline marking on the synch pair
2025-07-01 05:50:16.389 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:50:16.395 if eqi is None:
2025-07-01 05:50:16.401 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:50:16.407 atags = btags = ""
2025-07-01 05:50:16.413 cruncher.set_seqs(aelt, belt)
2025-07-01 05:50:16.419 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:50:16.425 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:50:16.431 if tag == 'replace':
2025-07-01 05:50:16.437 atags += '^' * la
2025-07-01 05:50:16.444 btags += '^' * lb
2025-07-01 05:50:16.451 elif tag == 'delete':
2025-07-01 05:50:16.459 atags += '-' * la
2025-07-01 05:50:16.468 elif tag == 'insert':
2025-07-01 05:50:16.474 btags += '+' * lb
2025-07-01 05:50:16.480 elif tag == 'equal':
2025-07-01 05:50:16.486 atags += ' ' * la
2025-07-01 05:50:16.492 btags += ' ' * lb
2025-07-01 05:50:16.497 else:
2025-07-01 05:50:16.503 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:50:16.509 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:50:16.514 else:
2025-07-01 05:50:16.520 # the synch pair is identical
2025-07-01 05:50:16.526 yield ' ' + aelt
2025-07-01 05:50:16.535
2025-07-01 05:50:16.545 # pump out diffs from after the synch point
2025-07-01 05:50:16.554 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:50:16.562
2025-07-01 05:50:16.571 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:50:16.578 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:50:16.584
2025-07-01 05:50:16.590 self = <difflib.Differ object at [hex]>
2025-07-01 05:50:16.602 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:50:16.611 alo = 427, ahi = 1101
2025-07-01 05:50:16.620 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:50:16.627 blo = 427, bhi = 1101
2025-07-01 05:50:16.632
2025-07-01 05:50:16.637 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:50:16.642 g = []
2025-07-01 05:50:16.647 if alo < ahi:
2025-07-01 05:50:16.653 if blo < bhi:
2025-07-01 05:50:16.659 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:50:16.666 else:
2025-07-01 05:50:16.675 g = self._dump('-', a, alo, ahi)
2025-07-01 05:50:16.686 elif blo < bhi:
2025-07-01 05:50:16.694 g = self._dump('+', b, blo, bhi)
2025-07-01 05:50:16.703
2025-07-01 05:50:16.713 > yield from g
2025-07-01 05:50:16.725
2025-07-01 05:50:16.733 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:50:16.741 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:50:16.747
2025-07-01 05:50:16.753 self = <difflib.Differ object at [hex]>
2025-07-01 05:50:16.760 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:50:16.765 alo = 427, ahi = 1101
2025-07-01 05:50:16.771 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:50:16.776 blo = 427, bhi = 1101
2025-07-01 05:50:16.780
2025-07-01 05:50:16.785 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:50:16.789 r"""
2025-07-01 05:50:16.794 When replacing one block of lines with another, search the blocks
2025-07-01 05:50:16.798 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:50:16.803 synch point, and intraline difference marking is done on the
2025-07-01 05:50:16.811 similar pair. Lots of work, but often worth it.
2025-07-01 05:50:16.822
2025-07-01 05:50:16.830 Example:
2025-07-01 05:50:16.836
2025-07-01 05:50:16.843 >>> d = Differ()
2025-07-01 05:50:16.852 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:50:16.865 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:50:16.876 >>> print(''.join(results), end="")
2025-07-01 05:50:16.884 - abcDefghiJkl
2025-07-01 05:50:16.897 + abcdefGhijkl
2025-07-01 05:50:16.914 """
2025-07-01 05:50:16.923
2025-07-01 05:50:16.935 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:50:16.944 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:50:16.951 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:50:16.958 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:50:16.965 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:50:16.970
2025-07-01 05:50:16.981 # search for the pair that matches best without being identical
2025-07-01 05:50:16.991 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:50:16.999 # on junk -- unless we have to)
2025-07-01 05:50:17.006 for j in range(blo, bhi):
2025-07-01 05:50:17.013 bj = b[j]
2025-07-01 05:50:17.019 cruncher.set_seq2(bj)
2025-07-01 05:50:17.025 for i in range(alo, ahi):
2025-07-01 05:50:17.031 ai = a[i]
2025-07-01 05:50:17.036 if ai == bj:
2025-07-01 05:50:17.043 if eqi is None:
2025-07-01 05:50:17.050 eqi, eqj = i, j
2025-07-01 05:50:17.058 continue
2025-07-01 05:50:17.064 cruncher.set_seq1(ai)
2025-07-01 05:50:17.070 # computing similarity is expensive, so use the quick
2025-07-01 05:50:17.076 # upper bounds first -- have seen this speed up messy
2025-07-01 05:50:17.082 # compares by a factor of 3.
2025-07-01 05:50:17.092 # note that ratio() is only expensive to compute the first
2025-07-01 05:50:17.100 # time it's called on a sequence pair; the expensive part
2025-07-01 05:50:17.106 # of the computation is cached by cruncher
2025-07-01 05:50:17.112 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:50:17.117 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:50:17.123 cruncher.ratio() > best_ratio:
2025-07-01 05:50:17.129 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:50:17.134 if best_ratio < cutoff:
2025-07-01 05:50:17.140 # no non-identical "pretty close" pair
2025-07-01 05:50:17.146 if eqi is None:
2025-07-01 05:50:17.152 # no identical pair either -- treat it as a straight replace
2025-07-01 05:50:17.166 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:50:17.178 return
2025-07-01 05:50:17.188 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:50:17.199 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:50:17.205 else:
2025-07-01 05:50:17.212 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:50:17.219 eqi = None
2025-07-01 05:50:17.224
2025-07-01 05:50:17.230 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:50:17.234 # identical
2025-07-01 05:50:17.239
2025-07-01 05:50:17.244 # pump out diffs from before the synch point
2025-07-01 05:50:17.249 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:50:17.254
2025-07-01 05:50:17.258 # do intraline marking on the synch pair
2025-07-01 05:50:17.263 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:50:17.268 if eqi is None:
2025-07-01 05:50:17.273 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:50:17.278 atags = btags = ""
2025-07-01 05:50:17.283 cruncher.set_seqs(aelt, belt)
2025-07-01 05:50:17.288 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:50:17.294 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:50:17.301 if tag == 'replace':
2025-07-01 05:50:17.307 atags += '^' * la
2025-07-01 05:50:17.313 btags += '^' * lb
2025-07-01 05:50:17.319 elif tag == 'delete':
2025-07-01 05:50:17.325 atags += '-' * la
2025-07-01 05:50:17.331 elif tag == 'insert':
2025-07-01 05:50:17.337 btags += '+' * lb
2025-07-01 05:50:17.343 elif tag == 'equal':
2025-07-01 05:50:17.349 atags += ' ' * la
2025-07-01 05:50:17.355 btags += ' ' * lb
2025-07-01 05:50:17.360 else:
2025-07-01 05:50:17.366 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:50:17.372 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:50:17.377 else:
2025-07-01 05:50:17.383 # the synch pair is identical
2025-07-01 05:50:17.390 yield ' ' + aelt
2025-07-01 05:50:17.396
2025-07-01 05:50:17.410 # pump out diffs from after the synch point
2025-07-01 05:50:17.419 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:50:17.425
2025-07-01 05:50:17.431 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:50:17.437 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:50:17.443
2025-07-01 05:50:17.450 self = <difflib.Differ object at [hex]>
2025-07-01 05:50:17.459 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:50:17.471 alo = 428, ahi = 1101
2025-07-01 05:50:17.479 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:50:17.486 blo = 428, bhi = 1101
2025-07-01 05:50:17.493
2025-07-01 05:50:17.499 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:50:17.504 g = []
2025-07-01 05:50:17.515 if alo < ahi:
2025-07-01 05:50:17.525 if blo < bhi:
2025-07-01 05:50:17.533 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:50:17.540 else:
2025-07-01 05:50:17.547 g = self._dump('-', a, alo, ahi)
2025-07-01 05:50:17.552 elif blo < bhi:
2025-07-01 05:50:17.558 g = self._dump('+', b, blo, bhi)
2025-07-01 05:50:17.563
2025-07-01 05:50:17.569 > yield from g
2025-07-01 05:50:17.575
2025-07-01 05:50:17.581 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:50:17.588 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:50:17.595
2025-07-01 05:50:17.604 self = <difflib.Differ object at [hex]>
2025-07-01 05:50:17.616 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:50:17.624 alo = 428, ahi = 1101
2025-07-01 05:50:17.631 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:50:17.637 blo = 428, bhi = 1101
2025-07-01 05:50:17.642
2025-07-01 05:50:17.648 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:50:17.653 r"""
2025-07-01 05:50:17.659 When replacing one block of lines with another, search the blocks
2025-07-01 05:50:17.665 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:50:17.671 synch point, and intraline difference marking is done on the
2025-07-01 05:50:17.676 similar pair. Lots of work, but often worth it.
2025-07-01 05:50:17.680
2025-07-01 05:50:17.685 Example:
2025-07-01 05:50:17.690
2025-07-01 05:50:17.694 >>> d = Differ()
2025-07-01 05:50:17.699 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:50:17.704 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:50:17.709 >>> print(''.join(results), end="")
2025-07-01 05:50:17.713 - abcDefghiJkl
2025-07-01 05:50:17.722 + abcdefGhijkl
2025-07-01 05:50:17.732 """
2025-07-01 05:50:17.738
2025-07-01 05:50:17.743 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:50:17.750 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:50:17.758 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:50:17.765 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:50:17.771 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:50:17.777
2025-07-01 05:50:17.782 # search for the pair that matches best without being identical
2025-07-01 05:50:17.788 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:50:17.794 # on junk -- unless we have to)
2025-07-01 05:50:17.802 for j in range(blo, bhi):
2025-07-01 05:50:17.809 bj = b[j]
2025-07-01 05:50:17.815 cruncher.set_seq2(bj)
2025-07-01 05:50:17.821 for i in range(alo, ahi):
2025-07-01 05:50:17.827 ai = a[i]
2025-07-01 05:50:17.832 if ai == bj:
2025-07-01 05:50:17.838 if eqi is None:
2025-07-01 05:50:17.844 eqi, eqj = i, j
2025-07-01 05:50:17.851 continue
2025-07-01 05:50:17.858 cruncher.set_seq1(ai)
2025-07-01 05:50:17.870 # computing similarity is expensive, so use the quick
2025-07-01 05:50:17.881 # upper bounds first -- have seen this speed up messy
2025-07-01 05:50:17.890 # compares by a factor of 3.
2025-07-01 05:50:17.897 # note that ratio() is only expensive to compute the first
2025-07-01 05:50:17.903 # time it's called on a sequence pair; the expensive part
2025-07-01 05:50:17.910 # of the computation is cached by cruncher
2025-07-01 05:50:17.919 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:50:17.929 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:50:17.937 cruncher.ratio() > best_ratio:
2025-07-01 05:50:17.946 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:50:17.953 if best_ratio < cutoff:
2025-07-01 05:50:17.959 # no non-identical "pretty close" pair
2025-07-01 05:50:17.965 if eqi is None:
2025-07-01 05:50:17.973 # no identical pair either -- treat it as a straight replace
2025-07-01 05:50:17.979 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:50:17.986 return
2025-07-01 05:50:17.995 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:50:18.007 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:50:18.018 else:
2025-07-01 05:50:18.026 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:50:18.032 eqi = None
2025-07-01 05:50:18.039
2025-07-01 05:50:18.050 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:50:18.057 # identical
2025-07-01 05:50:18.064
2025-07-01 05:50:18.071 # pump out diffs from before the synch point
2025-07-01 05:50:18.077 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:50:18.083
2025-07-01 05:50:18.089 # do intraline marking on the synch pair
2025-07-01 05:50:18.094 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:50:18.104 if eqi is None:
2025-07-01 05:50:18.113 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:50:18.121 atags = btags = ""
2025-07-01 05:50:18.131 cruncher.set_seqs(aelt, belt)
2025-07-01 05:50:18.141 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:50:18.153 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:50:18.163 if tag == 'replace':
2025-07-01 05:50:18.171 atags += '^' * la
2025-07-01 05:50:18.179 btags += '^' * lb
2025-07-01 05:50:18.190 elif tag == 'delete':
2025-07-01 05:50:18.197 atags += '-' * la
2025-07-01 05:50:18.203 elif tag == 'insert':
2025-07-01 05:50:18.211 btags += '+' * lb
2025-07-01 05:50:18.223 elif tag == 'equal':
2025-07-01 05:50:18.231 atags += ' ' * la
2025-07-01 05:50:18.238 btags += ' ' * lb
2025-07-01 05:50:18.243 else:
2025-07-01 05:50:18.249 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:50:18.255 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:50:18.263 else:
2025-07-01 05:50:18.271 # the synch pair is identical
2025-07-01 05:50:18.279 yield ' ' + aelt
2025-07-01 05:50:18.286
2025-07-01 05:50:18.293 # pump out diffs from after the synch point
2025-07-01 05:50:18.299 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:50:18.305
2025-07-01 05:50:18.312 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:50:18.319 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:50:18.330
2025-07-01 05:50:18.340 self = <difflib.Differ object at [hex]>
2025-07-01 05:50:18.348 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:50:18.355 alo = 429, ahi = 1101
2025-07-01 05:50:18.363 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:50:18.372 blo = 429, bhi = 1101
2025-07-01 05:50:18.383
2025-07-01 05:50:18.391 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:50:18.397 g = []
2025-07-01 05:50:18.403 if alo < ahi:
2025-07-01 05:50:18.408 if blo < bhi:
2025-07-01 05:50:18.413 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:50:18.420 else:
2025-07-01 05:50:18.426 g = self._dump('-', a, alo, ahi)
2025-07-01 05:50:18.432 elif blo < bhi:
2025-07-01 05:50:18.439 g = self._dump('+', b, blo, bhi)
2025-07-01 05:50:18.445
2025-07-01 05:50:18.451 > yield from g
2025-07-01 05:50:18.461
2025-07-01 05:50:18.472 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:50:18.481 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:50:18.488
2025-07-01 05:50:18.494 self = <difflib.Differ object at [hex]>
2025-07-01 05:50:18.499 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:50:18.504 alo = 429, ahi = 1101
2025-07-01 05:50:18.513 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:50:18.519 blo = 429, bhi = 1101
2025-07-01 05:50:18.526
2025-07-01 05:50:18.537 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:50:18.547 r"""
2025-07-01 05:50:18.558 When replacing one block of lines with another, search the blocks
2025-07-01 05:50:18.566 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:50:18.578 synch point, and intraline difference marking is done on the
2025-07-01 05:50:18.589 similar pair. Lots of work, but often worth it.
2025-07-01 05:50:18.597
2025-07-01 05:50:18.603 Example:
2025-07-01 05:50:18.611
2025-07-01 05:50:18.618 >>> d = Differ()
2025-07-01 05:50:18.625 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:50:18.632 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:50:18.639 >>> print(''.join(results), end="")
2025-07-01 05:50:18.646 - abcDefghiJkl
2025-07-01 05:50:18.669 + abcdefGhijkl
2025-07-01 05:50:18.681 """
2025-07-01 05:50:18.687
2025-07-01 05:50:18.693 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:50:18.699 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:50:18.706 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:50:18.713 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:50:18.719 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:50:18.724
2025-07-01 05:50:18.730 # search for the pair that matches best without being identical
2025-07-01 05:50:18.739 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:50:18.750 # on junk -- unless we have to)
2025-07-01 05:50:18.757 for j in range(blo, bhi):
2025-07-01 05:50:18.762 bj = b[j]
2025-07-01 05:50:18.768 cruncher.set_seq2(bj)
2025-07-01 05:50:18.780 for i in range(alo, ahi):
2025-07-01 05:50:18.791 ai = a[i]
2025-07-01 05:50:18.799 if ai == bj:
2025-07-01 05:50:18.806 if eqi is None:
2025-07-01 05:50:18.812 eqi, eqj = i, j
2025-07-01 05:50:18.818 continue
2025-07-01 05:50:18.824 cruncher.set_seq1(ai)
2025-07-01 05:50:18.830 # computing similarity is expensive, so use the quick
2025-07-01 05:50:18.840 # upper bounds first -- have seen this speed up messy
2025-07-01 05:50:18.850 # compares by a factor of 3.
2025-07-01 05:50:18.858 # note that ratio() is only expensive to compute the first
2025-07-01 05:50:18.864 # time it's called on a sequence pair; the expensive part
2025-07-01 05:50:18.869 # of the computation is cached by cruncher
2025-07-01 05:50:18.875 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:50:18.880 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:50:18.886 cruncher.ratio() > best_ratio:
2025-07-01 05:50:18.893 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:50:18.899 if best_ratio < cutoff:
2025-07-01 05:50:18.905 # no non-identical "pretty close" pair
2025-07-01 05:50:18.911 if eqi is None:
2025-07-01 05:50:18.917 # no identical pair either -- treat it as a straight replace
2025-07-01 05:50:18.924 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:50:18.931 return
2025-07-01 05:50:18.938 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:50:18.946 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:50:18.960 else:
2025-07-01 05:50:18.969 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:50:18.976 eqi = None
2025-07-01 05:50:18.982
2025-07-01 05:50:18.988 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:50:18.995 # identical
2025-07-01 05:50:18.999
2025-07-01 05:50:19.004 # pump out diffs from before the synch point
2025-07-01 05:50:19.010 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:50:19.016
2025-07-01 05:50:19.023 # do intraline marking on the synch pair
2025-07-01 05:50:19.030 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:50:19.040 if eqi is None:
2025-07-01 05:50:19.052 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:50:19.061 atags = btags = ""
2025-07-01 05:50:19.070 cruncher.set_seqs(aelt, belt)
2025-07-01 05:50:19.078 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:50:19.086 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:50:19.092 if tag == 'replace':
2025-07-01 05:50:19.098 atags += '^' * la
2025-07-01 05:50:19.104 btags += '^' * lb
2025-07-01 05:50:19.111 elif tag == 'delete':
2025-07-01 05:50:19.117 atags += '-' * la
2025-07-01 05:50:19.124 elif tag == 'insert':
2025-07-01 05:50:19.131 btags += '+' * lb
2025-07-01 05:50:19.138 elif tag == 'equal':
2025-07-01 05:50:19.144 atags += ' ' * la
2025-07-01 05:50:19.151 btags += ' ' * lb
2025-07-01 05:50:19.157 else:
2025-07-01 05:50:19.163 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:50:19.175 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:50:19.185 else:
2025-07-01 05:50:19.195 # the synch pair is identical
2025-07-01 05:50:19.203 yield ' ' + aelt
2025-07-01 05:50:19.211
2025-07-01 05:50:19.218 # pump out diffs from after the synch point
2025-07-01 05:50:19.226 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:50:19.236
2025-07-01 05:50:19.244 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:50:19.251 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:50:19.257
2025-07-01 05:50:19.263 self = <difflib.Differ object at [hex]>
2025-07-01 05:50:19.270 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:50:19.279 alo = 430, ahi = 1101
2025-07-01 05:50:19.291 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:50:19.300 blo = 430, bhi = 1101
2025-07-01 05:50:19.307
2025-07-01 05:50:19.313 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:50:19.320 g = []
2025-07-01 05:50:19.329 if alo < ahi:
2025-07-01 05:50:19.339 if blo < bhi:
2025-07-01 05:50:19.347 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:50:19.354 else:
2025-07-01 05:50:19.363 g = self._dump('-', a, alo, ahi)
2025-07-01 05:50:19.370 elif blo < bhi:
2025-07-01 05:50:19.377 g = self._dump('+', b, blo, bhi)
2025-07-01 05:50:19.383
2025-07-01 05:50:19.390 > yield from g
2025-07-01 05:50:19.401
2025-07-01 05:50:19.408 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:50:19.415 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:50:19.421
2025-07-01 05:50:19.432 self = <difflib.Differ object at [hex]>
2025-07-01 05:50:19.446 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:50:19.456 alo = 430, ahi = 1101
2025-07-01 05:50:19.464 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:50:19.477 blo = 430, bhi = 1101
2025-07-01 05:50:19.487
2025-07-01 05:50:19.499 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:50:19.508 r"""
2025-07-01 05:50:19.517 When replacing one block of lines with another, search the blocks
2025-07-01 05:50:19.532 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:50:19.539 synch point, and intraline difference marking is done on the
2025-07-01 05:50:19.546 similar pair. Lots of work, but often worth it.
2025-07-01 05:50:19.552
2025-07-01 05:50:19.558 Example:
2025-07-01 05:50:19.562
2025-07-01 05:50:19.569 >>> d = Differ()
2025-07-01 05:50:19.574 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:50:19.579 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:50:19.584 >>> print(''.join(results), end="")
2025-07-01 05:50:19.594 - abcDefghiJkl
2025-07-01 05:50:19.609 + abcdefGhijkl
2025-07-01 05:50:19.619 """
2025-07-01 05:50:19.624
2025-07-01 05:50:19.630 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:50:19.636 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:50:19.647 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:50:19.657 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:50:19.667 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:50:19.678
2025-07-01 05:50:19.689 # search for the pair that matches best without being identical
2025-07-01 05:50:19.701 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:50:19.711 # on junk -- unless we have to)
2025-07-01 05:50:19.722 for j in range(blo, bhi):
2025-07-01 05:50:19.731 bj = b[j]
2025-07-01 05:50:19.740 cruncher.set_seq2(bj)
2025-07-01 05:50:19.751 for i in range(alo, ahi):
2025-07-01 05:50:19.761 ai = a[i]
2025-07-01 05:50:19.768 if ai == bj:
2025-07-01 05:50:19.775 if eqi is None:
2025-07-01 05:50:19.783 eqi, eqj = i, j
2025-07-01 05:50:19.793 continue
2025-07-01 05:50:19.801 cruncher.set_seq1(ai)
2025-07-01 05:50:19.808 # computing similarity is expensive, so use the quick
2025-07-01 05:50:19.814 # upper bounds first -- have seen this speed up messy
2025-07-01 05:50:19.824 # compares by a factor of 3.
2025-07-01 05:50:19.834 # note that ratio() is only expensive to compute the first
2025-07-01 05:50:19.842 # time it's called on a sequence pair; the expensive part
2025-07-01 05:50:19.848 # of the computation is cached by cruncher
2025-07-01 05:50:19.854 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:50:19.860 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:50:19.866 cruncher.ratio() > best_ratio:
2025-07-01 05:50:19.876 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:50:19.885 if best_ratio < cutoff:
2025-07-01 05:50:19.891 # no non-identical "pretty close" pair
2025-07-01 05:50:19.897 if eqi is None:
2025-07-01 05:50:19.903 # no identical pair either -- treat it as a straight replace
2025-07-01 05:50:19.910 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:50:19.917 return
2025-07-01 05:50:19.929 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:50:19.940 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:50:19.951 else:
2025-07-01 05:50:19.962 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:50:19.973 eqi = None
2025-07-01 05:50:19.979
2025-07-01 05:50:19.984 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:50:19.992 # identical
2025-07-01 05:50:19.999
2025-07-01 05:50:20.004 # pump out diffs from before the synch point
2025-07-01 05:50:20.012 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:50:20.019
2025-07-01 05:50:20.026 # do intraline marking on the synch pair
2025-07-01 05:50:20.036 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:50:20.049 if eqi is None:
2025-07-01 05:50:20.057 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:50:20.064 atags = btags = ""
2025-07-01 05:50:20.069 cruncher.set_seqs(aelt, belt)
2025-07-01 05:50:20.074 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:50:20.083 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:50:20.090 if tag == 'replace':
2025-07-01 05:50:20.099 atags += '^' * la
2025-07-01 05:50:20.112 btags += '^' * lb
2025-07-01 05:50:20.120 elif tag == 'delete':
2025-07-01 05:50:20.127 atags += '-' * la
2025-07-01 05:50:20.132 elif tag == 'insert':
2025-07-01 05:50:20.138 btags += '+' * lb
2025-07-01 05:50:20.144 elif tag == 'equal':
2025-07-01 05:50:20.150 atags += ' ' * la
2025-07-01 05:50:20.158 btags += ' ' * lb
2025-07-01 05:50:20.169 else:
2025-07-01 05:50:20.179 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:50:20.186 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:50:20.192 else:
2025-07-01 05:50:20.199 # the synch pair is identical
2025-07-01 05:50:20.205 yield ' ' + aelt
2025-07-01 05:50:20.212
2025-07-01 05:50:20.219 # pump out diffs from after the synch point
2025-07-01 05:50:20.226 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:50:20.233
2025-07-01 05:50:20.240 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:50:20.247 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:50:20.254
2025-07-01 05:50:20.265 self = <difflib.Differ object at [hex]>
2025-07-01 05:50:20.275 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:50:20.282 alo = 431, ahi = 1101
2025-07-01 05:50:20.289 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:50:20.295 blo = 431, bhi = 1101
2025-07-01 05:50:20.301
2025-07-01 05:50:20.306 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:50:20.315 g = []
2025-07-01 05:50:20.322 if alo < ahi:
2025-07-01 05:50:20.329 if blo < bhi:
2025-07-01 05:50:20.336 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:50:20.342 else:
2025-07-01 05:50:20.349 g = self._dump('-', a, alo, ahi)
2025-07-01 05:50:20.354 elif blo < bhi:
2025-07-01 05:50:20.368 g = self._dump('+', b, blo, bhi)
2025-07-01 05:50:20.376
2025-07-01 05:50:20.388 > yield from g
2025-07-01 05:50:20.401
2025-07-01 05:50:20.414 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:50:20.427 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:50:20.439
2025-07-01 05:50:20.449 self = <difflib.Differ object at [hex]>
2025-07-01 05:50:20.461 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:50:20.470 alo = 431, ahi = 1101
2025-07-01 05:50:20.479 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:50:20.487 blo = 431, bhi = 1101
2025-07-01 05:50:20.498
2025-07-01 05:50:20.507 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:50:20.514 r"""
2025-07-01 05:50:20.522 When replacing one block of lines with another, search the blocks
2025-07-01 05:50:20.531 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:50:20.543 synch point, and intraline difference marking is done on the
2025-07-01 05:50:20.554 similar pair. Lots of work, but often worth it.
2025-07-01 05:50:20.566
2025-07-01 05:50:20.575 Example:
2025-07-01 05:50:20.585
2025-07-01 05:50:20.595 >>> d = Differ()
2025-07-01 05:50:20.603 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:50:20.617 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:50:20.629 >>> print(''.join(results), end="")
2025-07-01 05:50:20.637 - abcDefghiJkl
2025-07-01 05:50:20.649 + abcdefGhijkl
2025-07-01 05:50:20.666 """
2025-07-01 05:50:20.672
2025-07-01 05:50:20.678 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:50:20.685 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:50:20.696 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:50:20.705 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:50:20.712 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:50:20.718
2025-07-01 05:50:20.723 # search for the pair that matches best without being identical
2025-07-01 05:50:20.728 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:50:20.733 # on junk -- unless we have to)
2025-07-01 05:50:20.738 for j in range(blo, bhi):
2025-07-01 05:50:20.743 bj = b[j]
2025-07-01 05:50:20.750 cruncher.set_seq2(bj)
2025-07-01 05:50:20.761 for i in range(alo, ahi):
2025-07-01 05:50:20.768 ai = a[i]
2025-07-01 05:50:20.776 if ai == bj:
2025-07-01 05:50:20.783 if eqi is None:
2025-07-01 05:50:20.789 eqi, eqj = i, j
2025-07-01 05:50:20.795 continue
2025-07-01 05:50:20.800 cruncher.set_seq1(ai)
2025-07-01 05:50:20.807 # computing similarity is expensive, so use the quick
2025-07-01 05:50:20.815 # upper bounds first -- have seen this speed up messy
2025-07-01 05:50:20.822 # compares by a factor of 3.
2025-07-01 05:50:20.833 # note that ratio() is only expensive to compute the first
2025-07-01 05:50:20.844 # time it's called on a sequence pair; the expensive part
2025-07-01 05:50:20.851 # of the computation is cached by cruncher
2025-07-01 05:50:20.857 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:50:20.864 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:50:20.872 cruncher.ratio() > best_ratio:
2025-07-01 05:50:20.879 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:50:20.886 if best_ratio < cutoff:
2025-07-01 05:50:20.892 # no non-identical "pretty close" pair
2025-07-01 05:50:20.898 if eqi is None:
2025-07-01 05:50:20.905 # no identical pair either -- treat it as a straight replace
2025-07-01 05:50:20.912 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:50:20.918 return
2025-07-01 05:50:20.923 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:50:20.928 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:50:20.933 else:
2025-07-01 05:50:20.937 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:50:20.942 eqi = None
2025-07-01 05:50:20.946
2025-07-01 05:50:20.951 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:50:20.956 # identical
2025-07-01 05:50:20.962
2025-07-01 05:50:20.969 # pump out diffs from before the synch point
2025-07-01 05:50:20.976 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:50:20.983
2025-07-01 05:50:20.995 # do intraline marking on the synch pair
2025-07-01 05:50:21.006 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:50:21.015 if eqi is None:
2025-07-01 05:50:21.021 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:50:21.027 atags = btags = ""
2025-07-01 05:50:21.033 cruncher.set_seqs(aelt, belt)
2025-07-01 05:50:21.039 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:50:21.045 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:50:21.050 if tag == 'replace':
2025-07-01 05:50:21.061 atags += '^' * la
2025-07-01 05:50:21.071 btags += '^' * lb
2025-07-01 05:50:21.084 elif tag == 'delete':
2025-07-01 05:50:21.095 atags += '-' * la
2025-07-01 05:50:21.103 elif tag == 'insert':
2025-07-01 05:50:21.111 btags += '+' * lb
2025-07-01 05:50:21.119 elif tag == 'equal':
2025-07-01 05:50:21.126 atags += ' ' * la
2025-07-01 05:50:21.131 btags += ' ' * lb
2025-07-01 05:50:21.136 else:
2025-07-01 05:50:21.140 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:50:21.146 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:50:21.153 else:
2025-07-01 05:50:21.159 # the synch pair is identical
2025-07-01 05:50:21.169 yield ' ' + aelt
2025-07-01 05:50:21.175
2025-07-01 05:50:21.180 # pump out diffs from after the synch point
2025-07-01 05:50:21.189 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:50:21.195
2025-07-01 05:50:21.201 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:50:21.212 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:50:21.224
2025-07-01 05:50:21.232 self = <difflib.Differ object at [hex]>
2025-07-01 05:50:21.241 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:50:21.252 alo = 432, ahi = 1101
2025-07-01 05:50:21.264 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:50:21.274 blo = 432, bhi = 1101
2025-07-01 05:50:21.284
2025-07-01 05:50:21.294 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:50:21.304 g = []
2025-07-01 05:50:21.314 if alo < ahi:
2025-07-01 05:50:21.325 if blo < bhi:
2025-07-01 05:50:21.335 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:50:21.343 else:
2025-07-01 05:50:21.351 g = self._dump('-', a, alo, ahi)
2025-07-01 05:50:21.359 elif blo < bhi:
2025-07-01 05:50:21.366 g = self._dump('+', b, blo, bhi)
2025-07-01 05:50:21.376
2025-07-01 05:50:21.385 > yield from g
2025-07-01 05:50:21.392
2025-07-01 05:50:21.399 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:50:21.405 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:50:21.410
2025-07-01 05:50:21.420 self = <difflib.Differ object at [hex]>
2025-07-01 05:50:21.430 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:50:21.437 alo = 432, ahi = 1101
2025-07-01 05:50:21.446 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:50:21.457 blo = 432, bhi = 1101
2025-07-01 05:50:21.465
2025-07-01 05:50:21.473 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:50:21.480 r"""
2025-07-01 05:50:21.487 When replacing one block of lines with another, search the blocks
2025-07-01 05:50:21.494 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:50:21.505 synch point, and intraline difference marking is done on the
2025-07-01 05:50:21.517 similar pair. Lots of work, but often worth it.
2025-07-01 05:50:21.526
2025-07-01 05:50:21.533 Example:
2025-07-01 05:50:21.539
2025-07-01 05:50:21.547 >>> d = Differ()
2025-07-01 05:50:21.557 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:50:21.568 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:50:21.574 >>> print(''.join(results), end="")
2025-07-01 05:50:21.580 - abcDefghiJkl
2025-07-01 05:50:21.595 + abcdefGhijkl
2025-07-01 05:50:21.616 """
2025-07-01 05:50:21.622
2025-07-01 05:50:21.634 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:50:21.644 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:50:21.652 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:50:21.658 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:50:21.664 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:50:21.669
2025-07-01 05:50:21.676 # search for the pair that matches best without being identical
2025-07-01 05:50:21.683 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:50:21.690 # on junk -- unless we have to)
2025-07-01 05:50:21.697 for j in range(blo, bhi):
2025-07-01 05:50:21.703 bj = b[j]
2025-07-01 05:50:21.712 cruncher.set_seq2(bj)
2025-07-01 05:50:21.723 for i in range(alo, ahi):
2025-07-01 05:50:21.731 ai = a[i]
2025-07-01 05:50:21.738 if ai == bj:
2025-07-01 05:50:21.744 if eqi is None:
2025-07-01 05:50:21.754 eqi, eqj = i, j
2025-07-01 05:50:21.764 continue
2025-07-01 05:50:21.771 cruncher.set_seq1(ai)
2025-07-01 05:50:21.777 # computing similarity is expensive, so use the quick
2025-07-01 05:50:21.782 # upper bounds first -- have seen this speed up messy
2025-07-01 05:50:21.790 # compares by a factor of 3.
2025-07-01 05:50:21.800 # note that ratio() is only expensive to compute the first
2025-07-01 05:50:21.807 # time it's called on a sequence pair; the expensive part
2025-07-01 05:50:21.815 # of the computation is cached by cruncher
2025-07-01 05:50:21.822 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:50:21.828 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:50:21.835 cruncher.ratio() > best_ratio:
2025-07-01 05:50:21.849 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:50:21.859 if best_ratio < cutoff:
2025-07-01 05:50:21.867 # no non-identical "pretty close" pair
2025-07-01 05:50:21.874 if eqi is None:
2025-07-01 05:50:21.880 # no identical pair either -- treat it as a straight replace
2025-07-01 05:50:21.886 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:50:21.892 return
2025-07-01 05:50:21.898 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:50:21.904 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:50:21.910 else:
2025-07-01 05:50:21.920 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:50:21.931 eqi = None
2025-07-01 05:50:21.939
2025-07-01 05:50:21.946 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:50:21.952 # identical
2025-07-01 05:50:21.958
2025-07-01 05:50:21.970 # pump out diffs from before the synch point
2025-07-01 05:50:21.983 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:50:21.991
2025-07-01 05:50:21.997 # do intraline marking on the synch pair
2025-07-01 05:50:22.002 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:50:22.008 if eqi is None:
2025-07-01 05:50:22.021 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:50:22.032 atags = btags = ""
2025-07-01 05:50:22.040 cruncher.set_seqs(aelt, belt)
2025-07-01 05:50:22.050 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:50:22.061 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:50:22.071 if tag == 'replace':
2025-07-01 05:50:22.081 atags += '^' * la
2025-07-01 05:50:22.092 btags += '^' * lb
2025-07-01 05:50:22.100 elif tag == 'delete':
2025-07-01 05:50:22.107 atags += '-' * la
2025-07-01 05:50:22.115 elif tag == 'insert':
2025-07-01 05:50:22.127 btags += '+' * lb
2025-07-01 05:50:22.138 elif tag == 'equal':
2025-07-01 05:50:22.150 atags += ' ' * la
2025-07-01 05:50:22.159 btags += ' ' * lb
2025-07-01 05:50:22.165 else:
2025-07-01 05:50:22.171 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:50:22.177 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:50:22.181 else:
2025-07-01 05:50:22.185 # the synch pair is identical
2025-07-01 05:50:22.190 yield ' ' + aelt
2025-07-01 05:50:22.194
2025-07-01 05:50:22.198 # pump out diffs from after the synch point
2025-07-01 05:50:22.203 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:50:22.207
2025-07-01 05:50:22.211 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:50:22.216 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:50:22.220
2025-07-01 05:50:22.224 self = <difflib.Differ object at [hex]>
2025-07-01 05:50:22.237 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:50:22.244 alo = 433, ahi = 1101
2025-07-01 05:50:22.253 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:50:22.262 blo = 433, bhi = 1101
2025-07-01 05:50:22.271
2025-07-01 05:50:22.278 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:50:22.283 g = []
2025-07-01 05:50:22.288 if alo < ahi:
2025-07-01 05:50:22.292 if blo < bhi:
2025-07-01 05:50:22.296 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:50:22.301 else:
2025-07-01 05:50:22.305 g = self._dump('-', a, alo, ahi)
2025-07-01 05:50:22.311 elif blo < bhi:
2025-07-01 05:50:22.318 g = self._dump('+', b, blo, bhi)
2025-07-01 05:50:22.324
2025-07-01 05:50:22.330 > yield from g
2025-07-01 05:50:22.336
2025-07-01 05:50:22.343 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:50:22.351 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:50:22.362
2025-07-01 05:50:22.368 self = <difflib.Differ object at [hex]>
2025-07-01 05:50:22.376 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:50:22.388 alo = 433, ahi = 1101
2025-07-01 05:50:22.401 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:50:22.409 blo = 433, bhi = 1101
2025-07-01 05:50:22.419
2025-07-01 05:50:22.426 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:50:22.432 r"""
2025-07-01 05:50:22.439 When replacing one block of lines with another, search the blocks
2025-07-01 05:50:22.446 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:50:22.453 synch point, and intraline difference marking is done on the
2025-07-01 05:50:22.460 similar pair. Lots of work, but often worth it.
2025-07-01 05:50:22.466
2025-07-01 05:50:22.476 Example:
2025-07-01 05:50:22.486
2025-07-01 05:50:22.494 >>> d = Differ()
2025-07-01 05:50:22.501 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:50:22.506 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:50:22.511 >>> print(''.join(results), end="")
2025-07-01 05:50:22.516 - abcDefghiJkl
2025-07-01 05:50:22.526 + abcdefGhijkl
2025-07-01 05:50:22.546 """
2025-07-01 05:50:22.553
2025-07-01 05:50:22.562 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:50:22.572 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:50:22.581 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:50:22.589 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:50:22.596 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:50:22.601
2025-07-01 05:50:22.607 # search for the pair that matches best without being identical
2025-07-01 05:50:22.615 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:50:22.625 # on junk -- unless we have to)
2025-07-01 05:50:22.634 for j in range(blo, bhi):
2025-07-01 05:50:22.640 bj = b[j]
2025-07-01 05:50:22.646 cruncher.set_seq2(bj)
2025-07-01 05:50:22.656 for i in range(alo, ahi):
2025-07-01 05:50:22.665 ai = a[i]
2025-07-01 05:50:22.674 if ai == bj:
2025-07-01 05:50:22.686 if eqi is None:
2025-07-01 05:50:22.695 eqi, eqj = i, j
2025-07-01 05:50:22.704 continue
2025-07-01 05:50:22.712 cruncher.set_seq1(ai)
2025-07-01 05:50:22.720 # computing similarity is expensive, so use the quick
2025-07-01 05:50:22.727 # upper bounds first -- have seen this speed up messy
2025-07-01 05:50:22.734 # compares by a factor of 3.
2025-07-01 05:50:22.746 # note that ratio() is only expensive to compute the first
2025-07-01 05:50:22.754 # time it's called on a sequence pair; the expensive part
2025-07-01 05:50:22.761 # of the computation is cached by cruncher
2025-07-01 05:50:22.767 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:50:22.775 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:50:22.786 cruncher.ratio() > best_ratio:
2025-07-01 05:50:22.796 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:50:22.803 if best_ratio < cutoff:
2025-07-01 05:50:22.809 # no non-identical "pretty close" pair
2025-07-01 05:50:22.815 if eqi is None:
2025-07-01 05:50:22.821 # no identical pair either -- treat it as a straight replace
2025-07-01 05:50:22.832 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:50:22.843 return
2025-07-01 05:50:22.855 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:50:22.864 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:50:22.871 else:
2025-07-01 05:50:22.878 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:50:22.883 eqi = None
2025-07-01 05:50:22.888
2025-07-01 05:50:22.894 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:50:22.899 # identical
2025-07-01 05:50:22.904
2025-07-01 05:50:22.911 # pump out diffs from before the synch point
2025-07-01 05:50:22.919 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:50:22.929
2025-07-01 05:50:22.937 # do intraline marking on the synch pair
2025-07-01 05:50:22.944 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:50:22.950 if eqi is None:
2025-07-01 05:50:22.955 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:50:22.959 atags = btags = ""
2025-07-01 05:50:22.964 cruncher.set_seqs(aelt, belt)
2025-07-01 05:50:22.969 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:50:22.974 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:50:22.979 if tag == 'replace':
2025-07-01 05:50:22.984 atags += '^' * la
2025-07-01 05:50:22.990 btags += '^' * lb
2025-07-01 05:50:22.996 elif tag == 'delete':
2025-07-01 05:50:23.002 atags += '-' * la
2025-07-01 05:50:23.010 elif tag == 'insert':
2025-07-01 05:50:23.018 btags += '+' * lb
2025-07-01 05:50:23.024 elif tag == 'equal':
2025-07-01 05:50:23.031 atags += ' ' * la
2025-07-01 05:50:23.039 btags += ' ' * lb
2025-07-01 05:50:23.050 else:
2025-07-01 05:50:23.058 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:50:23.064 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:50:23.070 else:
2025-07-01 05:50:23.075 # the synch pair is identical
2025-07-01 05:50:23.081 yield ' ' + aelt
2025-07-01 05:50:23.088
2025-07-01 05:50:23.095 # pump out diffs from after the synch point
2025-07-01 05:50:23.103 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:50:23.113
2025-07-01 05:50:23.121 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:50:23.128 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:50:23.133
2025-07-01 05:50:23.138 self = <difflib.Differ object at [hex]>
2025-07-01 05:50:23.143 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:50:23.148 alo = 434, ahi = 1101
2025-07-01 05:50:23.153 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:50:23.159 blo = 434, bhi = 1101
2025-07-01 05:50:23.167
2025-07-01 05:50:23.174 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:50:23.182 g = []
2025-07-01 05:50:23.193 if alo < ahi:
2025-07-01 05:50:23.202 if blo < bhi:
2025-07-01 05:50:23.214 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:50:23.225 else:
2025-07-01 05:50:23.239 g = self._dump('-', a, alo, ahi)
2025-07-01 05:50:23.251 elif blo < bhi:
2025-07-01 05:50:23.261 g = self._dump('+', b, blo, bhi)
2025-07-01 05:50:23.268
2025-07-01 05:50:23.275 > yield from g
2025-07-01 05:50:23.283
2025-07-01 05:50:23.294 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:50:23.303 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:50:23.312
2025-07-01 05:50:23.320 self = <difflib.Differ object at [hex]>
2025-07-01 05:50:23.332 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:50:23.342 alo = 434, ahi = 1101
2025-07-01 05:50:23.350 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:50:23.361 blo = 434, bhi = 1101
2025-07-01 05:50:23.370
2025-07-01 05:50:23.379 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:50:23.389 r"""
2025-07-01 05:50:23.398 When replacing one block of lines with another, search the blocks
2025-07-01 05:50:23.406 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:50:23.415 synch point, and intraline difference marking is done on the
2025-07-01 05:50:23.426 similar pair. Lots of work, but often worth it.
2025-07-01 05:50:23.438
2025-07-01 05:50:23.447 Example:
2025-07-01 05:50:23.456
2025-07-01 05:50:23.464 >>> d = Differ()
2025-07-01 05:50:23.471 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:50:23.485 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:50:23.494 >>> print(''.join(results), end="")
2025-07-01 05:50:23.504 - abcDefghiJkl
2025-07-01 05:50:23.523 + abcdefGhijkl
2025-07-01 05:50:23.538 """
2025-07-01 05:50:23.545
2025-07-01 05:50:23.550 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:50:23.555 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:50:23.561 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:50:23.567 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:50:23.573 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:50:23.579
2025-07-01 05:50:23.584 # search for the pair that matches best without being identical
2025-07-01 05:50:23.590 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:50:23.600 # on junk -- unless we have to)
2025-07-01 05:50:23.611 for j in range(blo, bhi):
2025-07-01 05:50:23.618 bj = b[j]
2025-07-01 05:50:23.624 cruncher.set_seq2(bj)
2025-07-01 05:50:23.630 for i in range(alo, ahi):
2025-07-01 05:50:23.634 ai = a[i]
2025-07-01 05:50:23.639 if ai == bj:
2025-07-01 05:50:23.643 if eqi is None:
2025-07-01 05:50:23.648 eqi, eqj = i, j
2025-07-01 05:50:23.653 continue
2025-07-01 05:50:23.657 cruncher.set_seq1(ai)
2025-07-01 05:50:23.663 # computing similarity is expensive, so use the quick
2025-07-01 05:50:23.670 # upper bounds first -- have seen this speed up messy
2025-07-01 05:50:23.681 # compares by a factor of 3.
2025-07-01 05:50:23.690 # note that ratio() is only expensive to compute the first
2025-07-01 05:50:23.697 # time it's called on a sequence pair; the expensive part
2025-07-01 05:50:23.705 # of the computation is cached by cruncher
2025-07-01 05:50:23.718 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:50:23.731 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:50:23.740 cruncher.ratio() > best_ratio:
2025-07-01 05:50:23.748 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:50:23.755 if best_ratio < cutoff:
2025-07-01 05:50:23.762 # no non-identical "pretty close" pair
2025-07-01 05:50:23.773 if eqi is None:
2025-07-01 05:50:23.782 # no identical pair either -- treat it as a straight replace
2025-07-01 05:50:23.790 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:50:23.798 return
2025-07-01 05:50:23.807 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:50:23.817 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:50:23.825 else:
2025-07-01 05:50:23.837 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:50:23.846 eqi = None
2025-07-01 05:50:23.853
2025-07-01 05:50:23.860 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:50:23.866 # identical
2025-07-01 05:50:23.876
2025-07-01 05:50:23.885 # pump out diffs from before the synch point
2025-07-01 05:50:23.893 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:50:23.899
2025-07-01 05:50:23.904 # do intraline marking on the synch pair
2025-07-01 05:50:23.909 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:50:23.914 if eqi is None:
2025-07-01 05:50:23.920 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:50:23.925 atags = btags = ""
2025-07-01 05:50:23.937 cruncher.set_seqs(aelt, belt)
2025-07-01 05:50:23.947 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:50:23.955 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:50:23.962 if tag == 'replace':
2025-07-01 05:50:23.972 atags += '^' * la
2025-07-01 05:50:23.984 btags += '^' * lb
2025-07-01 05:50:23.993 elif tag == 'delete':
2025-07-01 05:50:24.001 atags += '-' * la
2025-07-01 05:50:24.007 elif tag == 'insert':
2025-07-01 05:50:24.013 btags += '+' * lb
2025-07-01 05:50:24.018 elif tag == 'equal':
2025-07-01 05:50:24.024 atags += ' ' * la
2025-07-01 05:50:24.030 btags += ' ' * lb
2025-07-01 05:50:24.034 else:
2025-07-01 05:50:24.039 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:50:24.043 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:50:24.048 else:
2025-07-01 05:50:24.053 # the synch pair is identical
2025-07-01 05:50:24.057 yield ' ' + aelt
2025-07-01 05:50:24.062
2025-07-01 05:50:24.069 # pump out diffs from after the synch point
2025-07-01 05:50:24.076 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:50:24.082
2025-07-01 05:50:24.087 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:50:24.092 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:50:24.097
2025-07-01 05:50:24.101 self = <difflib.Differ object at [hex]>
2025-07-01 05:50:24.107 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:50:24.111 alo = 435, ahi = 1101
2025-07-01 05:50:24.118 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:50:24.124 blo = 435, bhi = 1101
2025-07-01 05:50:24.130
2025-07-01 05:50:24.137 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:50:24.144 g = []
2025-07-01 05:50:24.155 if alo < ahi:
2025-07-01 05:50:24.168 if blo < bhi:
2025-07-01 05:50:24.181 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:50:24.189 else:
2025-07-01 05:50:24.196 g = self._dump('-', a, alo, ahi)
2025-07-01 05:50:24.202 elif blo < bhi:
2025-07-01 05:50:24.207 g = self._dump('+', b, blo, bhi)
2025-07-01 05:50:24.214
2025-07-01 05:50:24.219 > yield from g
2025-07-01 05:50:24.224
2025-07-01 05:50:24.230 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:50:24.237 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:50:24.242
2025-07-01 05:50:24.248 self = <difflib.Differ object at [hex]>
2025-07-01 05:50:24.255 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:50:24.260 alo = 435, ahi = 1101
2025-07-01 05:50:24.267 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:50:24.275 blo = 435, bhi = 1101
2025-07-01 05:50:24.286
2025-07-01 05:50:24.295 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:50:24.302 r"""
2025-07-01 05:50:24.314 When replacing one block of lines with another, search the blocks
2025-07-01 05:50:24.322 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:50:24.330 synch point, and intraline difference marking is done on the
2025-07-01 05:50:24.339 similar pair. Lots of work, but often worth it.
2025-07-01 05:50:24.349
2025-07-01 05:50:24.359 Example:
2025-07-01 05:50:24.367
2025-07-01 05:50:24.378 >>> d = Differ()
2025-07-01 05:50:24.385 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:50:24.392 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:50:24.399 >>> print(''.join(results), end="")
2025-07-01 05:50:24.406 - abcDefghiJkl
2025-07-01 05:50:24.419 + abcdefGhijkl
2025-07-01 05:50:24.431 """
2025-07-01 05:50:24.438
2025-07-01 05:50:24.448 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:50:24.459 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:50:24.465 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:50:24.471 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:50:24.478 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:50:24.484
2025-07-01 05:50:24.491 # search for the pair that matches best without being identical
2025-07-01 05:50:24.501 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:50:24.508 # on junk -- unless we have to)
2025-07-01 05:50:24.515 for j in range(blo, bhi):
2025-07-01 05:50:24.521 bj = b[j]
2025-07-01 05:50:24.527 cruncher.set_seq2(bj)
2025-07-01 05:50:24.533 for i in range(alo, ahi):
2025-07-01 05:50:24.538 ai = a[i]
2025-07-01 05:50:24.544 if ai == bj:
2025-07-01 05:50:24.549 if eqi is None:
2025-07-01 05:50:24.555 eqi, eqj = i, j
2025-07-01 05:50:24.560 continue
2025-07-01 05:50:24.566 cruncher.set_seq1(ai)
2025-07-01 05:50:24.572 # computing similarity is expensive, so use the quick
2025-07-01 05:50:24.578 # upper bounds first -- have seen this speed up messy
2025-07-01 05:50:24.583 # compares by a factor of 3.
2025-07-01 05:50:24.589 # note that ratio() is only expensive to compute the first
2025-07-01 05:50:24.595 # time it's called on a sequence pair; the expensive part
2025-07-01 05:50:24.602 # of the computation is cached by cruncher
2025-07-01 05:50:24.608 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:50:24.614 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:50:24.624 cruncher.ratio() > best_ratio:
2025-07-01 05:50:24.633 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:50:24.644 if best_ratio < cutoff:
2025-07-01 05:50:24.654 # no non-identical "pretty close" pair
2025-07-01 05:50:24.663 if eqi is None:
2025-07-01 05:50:24.672 # no identical pair either -- treat it as a straight replace
2025-07-01 05:50:24.681 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:50:24.697 return
2025-07-01 05:50:24.705 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:50:24.713 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:50:24.719 else:
2025-07-01 05:50:24.725 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:50:24.731 eqi = None
2025-07-01 05:50:24.737
2025-07-01 05:50:24.743 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:50:24.750 # identical
2025-07-01 05:50:24.759
2025-07-01 05:50:24.766 # pump out diffs from before the synch point
2025-07-01 05:50:24.773 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:50:24.778
2025-07-01 05:50:24.785 # do intraline marking on the synch pair
2025-07-01 05:50:24.792 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:50:24.797 if eqi is None:
2025-07-01 05:50:24.802 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:50:24.808 atags = btags = ""
2025-07-01 05:50:24.814 cruncher.set_seqs(aelt, belt)
2025-07-01 05:50:24.819 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:50:24.825 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:50:24.831 if tag == 'replace':
2025-07-01 05:50:24.837 atags += '^' * la
2025-07-01 05:50:24.844 btags += '^' * lb
2025-07-01 05:50:24.850 elif tag == 'delete':
2025-07-01 05:50:24.857 atags += '-' * la
2025-07-01 05:50:24.863 elif tag == 'insert':
2025-07-01 05:50:24.868 btags += '+' * lb
2025-07-01 05:50:24.874 elif tag == 'equal':
2025-07-01 05:50:24.880 atags += ' ' * la
2025-07-01 05:50:24.885 btags += ' ' * lb
2025-07-01 05:50:24.891 else:
2025-07-01 05:50:24.899 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:50:24.909 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:50:24.915 else:
2025-07-01 05:50:24.921 # the synch pair is identical
2025-07-01 05:50:24.926 yield ' ' + aelt
2025-07-01 05:50:24.932
2025-07-01 05:50:24.938 # pump out diffs from after the synch point
2025-07-01 05:50:24.944 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:50:24.951
2025-07-01 05:50:24.959 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:50:24.970 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:50:24.978
2025-07-01 05:50:24.984 self = <difflib.Differ object at [hex]>
2025-07-01 05:50:24.990 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:50:24.996 alo = 436, ahi = 1101
2025-07-01 05:50:25.002 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:50:25.008 blo = 436, bhi = 1101
2025-07-01 05:50:25.014
2025-07-01 05:50:25.021 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:50:25.028 g = []
2025-07-01 05:50:25.035 if alo < ahi:
2025-07-01 05:50:25.041 if blo < bhi:
2025-07-01 05:50:25.052 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:50:25.061 else:
2025-07-01 05:50:25.068 g = self._dump('-', a, alo, ahi)
2025-07-01 05:50:25.074 elif blo < bhi:
2025-07-01 05:50:25.079 g = self._dump('+', b, blo, bhi)
2025-07-01 05:50:25.084
2025-07-01 05:50:25.089 > yield from g
2025-07-01 05:50:25.093
2025-07-01 05:50:25.098 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:50:25.103 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:50:25.108
2025-07-01 05:50:25.112 self = <difflib.Differ object at [hex]>
2025-07-01 05:50:25.117 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:50:25.122 alo = 436, ahi = 1101
2025-07-01 05:50:25.127 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:50:25.132 blo = 436, bhi = 1101
2025-07-01 05:50:25.138
2025-07-01 05:50:25.144 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:50:25.149 r"""
2025-07-01 05:50:25.154 When replacing one block of lines with another, search the blocks
2025-07-01 05:50:25.160 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:50:25.165 synch point, and intraline difference marking is done on the
2025-07-01 05:50:25.171 similar pair. Lots of work, but often worth it.
2025-07-01 05:50:25.176
2025-07-01 05:50:25.182 Example:
2025-07-01 05:50:25.187
2025-07-01 05:50:25.195 >>> d = Differ()
2025-07-01 05:50:25.208 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:50:25.217 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:50:25.223 >>> print(''.join(results), end="")
2025-07-01 05:50:25.234 - abcDefghiJkl
2025-07-01 05:50:25.254 + abcdefGhijkl
2025-07-01 05:50:25.277 """
2025-07-01 05:50:25.287
2025-07-01 05:50:25.298 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:50:25.309 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:50:25.317 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:50:25.324 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:50:25.331 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:50:25.339
2025-07-01 05:50:25.351 # search for the pair that matches best without being identical
2025-07-01 05:50:25.362 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:50:25.371 # on junk -- unless we have to)
2025-07-01 05:50:25.384 for j in range(blo, bhi):
2025-07-01 05:50:25.397 bj = b[j]
2025-07-01 05:50:25.407 cruncher.set_seq2(bj)
2025-07-01 05:50:25.416 for i in range(alo, ahi):
2025-07-01 05:50:25.424 ai = a[i]
2025-07-01 05:50:25.430 if ai == bj:
2025-07-01 05:50:25.437 if eqi is None:
2025-07-01 05:50:25.443 eqi, eqj = i, j
2025-07-01 05:50:25.450 continue
2025-07-01 05:50:25.457 cruncher.set_seq1(ai)
2025-07-01 05:50:25.471 # computing similarity is expensive, so use the quick
2025-07-01 05:50:25.480 # upper bounds first -- have seen this speed up messy
2025-07-01 05:50:25.488 # compares by a factor of 3.
2025-07-01 05:50:25.501 # note that ratio() is only expensive to compute the first
2025-07-01 05:50:25.511 # time it's called on a sequence pair; the expensive part
2025-07-01 05:50:25.526 # of the computation is cached by cruncher
2025-07-01 05:50:25.538 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:50:25.547 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:50:25.556 cruncher.ratio() > best_ratio:
2025-07-01 05:50:25.569 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:50:25.579 if best_ratio < cutoff:
2025-07-01 05:50:25.588 # no non-identical "pretty close" pair
2025-07-01 05:50:25.596 if eqi is None:
2025-07-01 05:50:25.604 # no identical pair either -- treat it as a straight replace
2025-07-01 05:50:25.611 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:50:25.618 return
2025-07-01 05:50:25.628 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:50:25.638 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:50:25.647 else:
2025-07-01 05:50:25.655 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:50:25.663 eqi = None
2025-07-01 05:50:25.670
2025-07-01 05:50:25.682 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:50:25.691 # identical
2025-07-01 05:50:25.702
2025-07-01 05:50:25.713 # pump out diffs from before the synch point
2025-07-01 05:50:25.726 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:50:25.738
2025-07-01 05:50:25.747 # do intraline marking on the synch pair
2025-07-01 05:50:25.756 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:50:25.769 if eqi is None:
2025-07-01 05:50:25.778 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:50:25.787 atags = btags = ""
2025-07-01 05:50:25.798 cruncher.set_seqs(aelt, belt)
2025-07-01 05:50:25.807 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:50:25.817 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:50:25.828 if tag == 'replace':
2025-07-01 05:50:25.838 atags += '^' * la
2025-07-01 05:50:25.849 btags += '^' * lb
2025-07-01 05:50:25.857 elif tag == 'delete':
2025-07-01 05:50:25.865 atags += '-' * la
2025-07-01 05:50:25.873 elif tag == 'insert':
2025-07-01 05:50:25.880 btags += '+' * lb
2025-07-01 05:50:25.886 elif tag == 'equal':
2025-07-01 05:50:25.892 atags += ' ' * la
2025-07-01 05:50:25.899 btags += ' ' * lb
2025-07-01 05:50:25.905 else:
2025-07-01 05:50:25.912 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:50:25.919 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:50:25.925 else:
2025-07-01 05:50:25.930 # the synch pair is identical
2025-07-01 05:50:25.937 yield ' ' + aelt
2025-07-01 05:50:25.943
2025-07-01 05:50:25.949 # pump out diffs from after the synch point
2025-07-01 05:50:25.954 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:50:25.963
2025-07-01 05:50:25.970 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:50:25.978 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:50:25.985
2025-07-01 05:50:25.992 self = <difflib.Differ object at [hex]>
2025-07-01 05:50:25.999 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:50:26.009 alo = 437, ahi = 1101
2025-07-01 05:50:26.020 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:50:26.029 blo = 437, bhi = 1101
2025-07-01 05:50:26.036
2025-07-01 05:50:26.046 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:50:26.056 g = []
2025-07-01 05:50:26.064 if alo < ahi:
2025-07-01 05:50:26.071 if blo < bhi:
2025-07-01 05:50:26.079 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:50:26.087 else:
2025-07-01 05:50:26.094 g = self._dump('-', a, alo, ahi)
2025-07-01 05:50:26.103 elif blo < bhi:
2025-07-01 05:50:26.113 g = self._dump('+', b, blo, bhi)
2025-07-01 05:50:26.122
2025-07-01 05:50:26.129 > yield from g
2025-07-01 05:50:26.140
2025-07-01 05:50:26.150 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:50:26.160 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:50:26.167
2025-07-01 05:50:26.174 self = <difflib.Differ object at [hex]>
2025-07-01 05:50:26.182 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:50:26.188 alo = 437, ahi = 1101
2025-07-01 05:50:26.195 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:50:26.200 blo = 437, bhi = 1101
2025-07-01 05:50:26.206
2025-07-01 05:50:26.216 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:50:26.226 r"""
2025-07-01 05:50:26.233 When replacing one block of lines with another, search the blocks
2025-07-01 05:50:26.240 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:50:26.246 synch point, and intraline difference marking is done on the
2025-07-01 05:50:26.258 similar pair. Lots of work, but often worth it.
2025-07-01 05:50:26.270
2025-07-01 05:50:26.279 Example:
2025-07-01 05:50:26.290
2025-07-01 05:50:26.299 >>> d = Differ()
2025-07-01 05:50:26.307 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:50:26.314 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:50:26.322 >>> print(''.join(results), end="")
2025-07-01 05:50:26.334 - abcDefghiJkl
2025-07-01 05:50:26.358 + abcdefGhijkl
2025-07-01 05:50:26.382 """
2025-07-01 05:50:26.392
2025-07-01 05:50:26.400 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:50:26.406 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:50:26.411 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:50:26.416 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:50:26.420 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:50:26.426
2025-07-01 05:50:26.431 # search for the pair that matches best without being identical
2025-07-01 05:50:26.439 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:50:26.450 # on junk -- unless we have to)
2025-07-01 05:50:26.457 for j in range(blo, bhi):
2025-07-01 05:50:26.462 bj = b[j]
2025-07-01 05:50:26.474 cruncher.set_seq2(bj)
2025-07-01 05:50:26.483 for i in range(alo, ahi):
2025-07-01 05:50:26.491 ai = a[i]
2025-07-01 05:50:26.498 if ai == bj:
2025-07-01 05:50:26.503 if eqi is None:
2025-07-01 05:50:26.511 eqi, eqj = i, j
2025-07-01 05:50:26.521 continue
2025-07-01 05:50:26.528 cruncher.set_seq1(ai)
2025-07-01 05:50:26.534 # computing similarity is expensive, so use the quick
2025-07-01 05:50:26.539 # upper bounds first -- have seen this speed up messy
2025-07-01 05:50:26.544 # compares by a factor of 3.
2025-07-01 05:50:26.549 # note that ratio() is only expensive to compute the first
2025-07-01 05:50:26.554 # time it's called on a sequence pair; the expensive part
2025-07-01 05:50:26.561 # of the computation is cached by cruncher
2025-07-01 05:50:26.567 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:50:26.575 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:50:26.584 cruncher.ratio() > best_ratio:
2025-07-01 05:50:26.591 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:50:26.603 if best_ratio < cutoff:
2025-07-01 05:50:26.613 # no non-identical "pretty close" pair
2025-07-01 05:50:26.619 if eqi is None:
2025-07-01 05:50:26.625 # no identical pair either -- treat it as a straight replace
2025-07-01 05:50:26.630 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:50:26.637 return
2025-07-01 05:50:26.643 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:50:26.650 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:50:26.656 else:
2025-07-01 05:50:26.662 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:50:26.667 eqi = None
2025-07-01 05:50:26.673
2025-07-01 05:50:26.680 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:50:26.685 # identical
2025-07-01 05:50:26.691
2025-07-01 05:50:26.697 # pump out diffs from before the synch point
2025-07-01 05:50:26.703 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:50:26.712
2025-07-01 05:50:26.721 # do intraline marking on the synch pair
2025-07-01 05:50:26.729 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:50:26.736 if eqi is None:
2025-07-01 05:50:26.742 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:50:26.747 atags = btags = ""
2025-07-01 05:50:26.752 cruncher.set_seqs(aelt, belt)
2025-07-01 05:50:26.756 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:50:26.761 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:50:26.770 if tag == 'replace':
2025-07-01 05:50:26.779 atags += '^' * la
2025-07-01 05:50:26.785 btags += '^' * lb
2025-07-01 05:50:26.791 elif tag == 'delete':
2025-07-01 05:50:26.801 atags += '-' * la
2025-07-01 05:50:26.810 elif tag == 'insert':
2025-07-01 05:50:26.816 btags += '+' * lb
2025-07-01 05:50:26.822 elif tag == 'equal':
2025-07-01 05:50:26.827 atags += ' ' * la
2025-07-01 05:50:26.836 btags += ' ' * lb
2025-07-01 05:50:26.847 else:
2025-07-01 05:50:26.854 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:50:26.860 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:50:26.866 else:
2025-07-01 05:50:26.871 # the synch pair is identical
2025-07-01 05:50:26.881 yield ' ' + aelt
2025-07-01 05:50:26.890
2025-07-01 05:50:26.897 # pump out diffs from after the synch point
2025-07-01 05:50:26.905 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:50:26.917
2025-07-01 05:50:26.926 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:50:26.937 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:50:26.944
2025-07-01 05:50:26.950 self = <difflib.Differ object at [hex]>
2025-07-01 05:50:26.958 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:50:26.963 alo = 438, ahi = 1101
2025-07-01 05:50:26.971 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:50:26.976 blo = 438, bhi = 1101
2025-07-01 05:50:26.983
2025-07-01 05:50:26.993 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:50:27.002 g = []
2025-07-01 05:50:27.012 if alo < ahi:
2025-07-01 05:50:27.022 if blo < bhi:
2025-07-01 05:50:27.031 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:50:27.039 else:
2025-07-01 05:50:27.050 g = self._dump('-', a, alo, ahi)
2025-07-01 05:50:27.058 elif blo < bhi:
2025-07-01 05:50:27.067 g = self._dump('+', b, blo, bhi)
2025-07-01 05:50:27.079
2025-07-01 05:50:27.086 > yield from g
2025-07-01 05:50:27.093
2025-07-01 05:50:27.100 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:50:27.106 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:50:27.111
2025-07-01 05:50:27.117 self = <difflib.Differ object at [hex]>
2025-07-01 05:50:27.123 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:50:27.131 alo = 438, ahi = 1101
2025-07-01 05:50:27.141 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:50:27.148 blo = 438, bhi = 1101
2025-07-01 05:50:27.158
2025-07-01 05:50:27.166 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:50:27.177 r"""
2025-07-01 05:50:27.186 When replacing one block of lines with another, search the blocks
2025-07-01 05:50:27.194 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:50:27.200 synch point, and intraline difference marking is done on the
2025-07-01 05:50:27.207 similar pair. Lots of work, but often worth it.
2025-07-01 05:50:27.220
2025-07-01 05:50:27.229 Example:
2025-07-01 05:50:27.236
2025-07-01 05:50:27.244 >>> d = Differ()
2025-07-01 05:50:27.251 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:50:27.259 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:50:27.270 >>> print(''.join(results), end="")
2025-07-01 05:50:27.281 - abcDefghiJkl
2025-07-01 05:50:27.299 + abcdefGhijkl
2025-07-01 05:50:27.312 """
2025-07-01 05:50:27.324
2025-07-01 05:50:27.333 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:50:27.340 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:50:27.346 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:50:27.351 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:50:27.357 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:50:27.361
2025-07-01 05:50:27.367 # search for the pair that matches best without being identical
2025-07-01 05:50:27.372 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:50:27.377 # on junk -- unless we have to)
2025-07-01 05:50:27.382 for j in range(blo, bhi):
2025-07-01 05:50:27.387 bj = b[j]
2025-07-01 05:50:27.391 cruncher.set_seq2(bj)
2025-07-01 05:50:27.398 for i in range(alo, ahi):
2025-07-01 05:50:27.403 ai = a[i]
2025-07-01 05:50:27.407 if ai == bj:
2025-07-01 05:50:27.412 if eqi is None:
2025-07-01 05:50:27.416 eqi, eqj = i, j
2025-07-01 05:50:27.421 continue
2025-07-01 05:50:27.426 cruncher.set_seq1(ai)
2025-07-01 05:50:27.432 # computing similarity is expensive, so use the quick
2025-07-01 05:50:27.437 # upper bounds first -- have seen this speed up messy
2025-07-01 05:50:27.443 # compares by a factor of 3.
2025-07-01 05:50:27.455 # note that ratio() is only expensive to compute the first
2025-07-01 05:50:27.466 # time it's called on a sequence pair; the expensive part
2025-07-01 05:50:27.478 # of the computation is cached by cruncher
2025-07-01 05:50:27.489 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:50:27.501 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:50:27.510 cruncher.ratio() > best_ratio:
2025-07-01 05:50:27.518 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:50:27.528 if best_ratio < cutoff:
2025-07-01 05:50:27.542 # no non-identical "pretty close" pair
2025-07-01 05:50:27.556 if eqi is None:
2025-07-01 05:50:27.565 # no identical pair either -- treat it as a straight replace
2025-07-01 05:50:27.572 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:50:27.578 return
2025-07-01 05:50:27.585 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:50:27.591 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:50:27.597 else:
2025-07-01 05:50:27.604 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:50:27.610 eqi = None
2025-07-01 05:50:27.616
2025-07-01 05:50:27.623 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:50:27.630 # identical
2025-07-01 05:50:27.637
2025-07-01 05:50:27.644 # pump out diffs from before the synch point
2025-07-01 05:50:27.651 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:50:27.657
2025-07-01 05:50:27.663 # do intraline marking on the synch pair
2025-07-01 05:50:27.669 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:50:27.675 if eqi is None:
2025-07-01 05:50:27.681 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:50:27.687 atags = btags = ""
2025-07-01 05:50:27.693 cruncher.set_seqs(aelt, belt)
2025-07-01 05:50:27.699 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:50:27.705 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:50:27.713 if tag == 'replace':
2025-07-01 05:50:27.722 atags += '^' * la
2025-07-01 05:50:27.729 btags += '^' * lb
2025-07-01 05:50:27.735 elif tag == 'delete':
2025-07-01 05:50:27.741 atags += '-' * la
2025-07-01 05:50:27.747 elif tag == 'insert':
2025-07-01 05:50:27.753 btags += '+' * lb
2025-07-01 05:50:27.759 elif tag == 'equal':
2025-07-01 05:50:27.765 atags += ' ' * la
2025-07-01 05:50:27.771 btags += ' ' * lb
2025-07-01 05:50:27.777 else:
2025-07-01 05:50:27.783 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:50:27.789 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:50:27.794 else:
2025-07-01 05:50:27.800 # the synch pair is identical
2025-07-01 05:50:27.805 yield ' ' + aelt
2025-07-01 05:50:27.810
2025-07-01 05:50:27.816 # pump out diffs from after the synch point
2025-07-01 05:50:27.821 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:50:27.827
2025-07-01 05:50:27.833 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:50:27.840 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:50:27.847
2025-07-01 05:50:27.854 self = <difflib.Differ object at [hex]>
2025-07-01 05:50:27.862 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:50:27.867 alo = 439, ahi = 1101
2025-07-01 05:50:27.874 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:50:27.879 blo = 439, bhi = 1101
2025-07-01 05:50:27.886
2025-07-01 05:50:27.895 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:50:27.907 g = []
2025-07-01 05:50:27.918 if alo < ahi:
2025-07-01 05:50:27.928 if blo < bhi:
2025-07-01 05:50:27.936 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:50:27.943 else:
2025-07-01 05:50:27.949 g = self._dump('-', a, alo, ahi)
2025-07-01 05:50:27.955 elif blo < bhi:
2025-07-01 05:50:27.962 g = self._dump('+', b, blo, bhi)
2025-07-01 05:50:27.972
2025-07-01 05:50:27.980 > yield from g
2025-07-01 05:50:27.986
2025-07-01 05:50:27.992 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:50:27.997 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:50:28.003
2025-07-01 05:50:28.009 self = <difflib.Differ object at [hex]>
2025-07-01 05:50:28.017 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:50:28.022 alo = 439, ahi = 1101
2025-07-01 05:50:28.029 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:50:28.034 blo = 439, bhi = 1101
2025-07-01 05:50:28.040
2025-07-01 05:50:28.045 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:50:28.051 r"""
2025-07-01 05:50:28.058 When replacing one block of lines with another, search the blocks
2025-07-01 05:50:28.069 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:50:28.078 synch point, and intraline difference marking is done on the
2025-07-01 05:50:28.085 similar pair. Lots of work, but often worth it.
2025-07-01 05:50:28.092
2025-07-01 05:50:28.098 Example:
2025-07-01 05:50:28.109
2025-07-01 05:50:28.118 >>> d = Differ()
2025-07-01 05:50:28.125 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:50:28.132 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:50:28.138 >>> print(''.join(results), end="")
2025-07-01 05:50:28.149 - abcDefghiJkl
2025-07-01 05:50:28.169 + abcdefGhijkl
2025-07-01 05:50:28.189 """
2025-07-01 05:50:28.196
2025-07-01 05:50:28.202 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:50:28.208 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:50:28.215 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:50:28.222 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:50:28.228 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:50:28.232
2025-07-01 05:50:28.238 # search for the pair that matches best without being identical
2025-07-01 05:50:28.243 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:50:28.248 # on junk -- unless we have to)
2025-07-01 05:50:28.253 for j in range(blo, bhi):
2025-07-01 05:50:28.258 bj = b[j]
2025-07-01 05:50:28.266 cruncher.set_seq2(bj)
2025-07-01 05:50:28.276 for i in range(alo, ahi):
2025-07-01 05:50:28.284 ai = a[i]
2025-07-01 05:50:28.296 if ai == bj:
2025-07-01 05:50:28.308 if eqi is None:
2025-07-01 05:50:28.316 eqi, eqj = i, j
2025-07-01 05:50:28.324 continue
2025-07-01 05:50:28.329 cruncher.set_seq1(ai)
2025-07-01 05:50:28.337 # computing similarity is expensive, so use the quick
2025-07-01 05:50:28.343 # upper bounds first -- have seen this speed up messy
2025-07-01 05:50:28.348 # compares by a factor of 3.
2025-07-01 05:50:28.354 # note that ratio() is only expensive to compute the first
2025-07-01 05:50:28.363 # time it's called on a sequence pair; the expensive part
2025-07-01 05:50:28.375 # of the computation is cached by cruncher
2025-07-01 05:50:28.383 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:50:28.391 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:50:28.400 cruncher.ratio() > best_ratio:
2025-07-01 05:50:28.406 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:50:28.411 if best_ratio < cutoff:
2025-07-01 05:50:28.416 # no non-identical "pretty close" pair
2025-07-01 05:50:28.421 if eqi is None:
2025-07-01 05:50:28.426 # no identical pair either -- treat it as a straight replace
2025-07-01 05:50:28.431 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:50:28.436 return
2025-07-01 05:50:28.441 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:50:28.445 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:50:28.450 else:
2025-07-01 05:50:28.462 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:50:28.472 eqi = None
2025-07-01 05:50:28.479
2025-07-01 05:50:28.485 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:50:28.490 # identical
2025-07-01 05:50:28.495
2025-07-01 05:50:28.500 # pump out diffs from before the synch point
2025-07-01 05:50:28.506 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:50:28.512
2025-07-01 05:50:28.518 # do intraline marking on the synch pair
2025-07-01 05:50:28.528 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:50:28.538 if eqi is None:
2025-07-01 05:50:28.547 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:50:28.560 atags = btags = ""
2025-07-01 05:50:28.569 cruncher.set_seqs(aelt, belt)
2025-07-01 05:50:28.576 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:50:28.582 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:50:28.588 if tag == 'replace':
2025-07-01 05:50:28.595 atags += '^' * la
2025-07-01 05:50:28.601 btags += '^' * lb
2025-07-01 05:50:28.608 elif tag == 'delete':
2025-07-01 05:50:28.615 atags += '-' * la
2025-07-01 05:50:28.628 elif tag == 'insert':
2025-07-01 05:50:28.638 btags += '+' * lb
2025-07-01 05:50:28.650 elif tag == 'equal':
2025-07-01 05:50:28.657 atags += ' ' * la
2025-07-01 05:50:28.663 btags += ' ' * lb
2025-07-01 05:50:28.669 else:
2025-07-01 05:50:28.674 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:50:28.679 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:50:28.684 else:
2025-07-01 05:50:28.690 # the synch pair is identical
2025-07-01 05:50:28.696 yield ' ' + aelt
2025-07-01 05:50:28.703
2025-07-01 05:50:28.709 # pump out diffs from after the synch point
2025-07-01 05:50:28.716 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:50:28.722
2025-07-01 05:50:28.728 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:50:28.741 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:50:28.750
2025-07-01 05:50:28.760 self = <difflib.Differ object at [hex]>
2025-07-01 05:50:28.768 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:50:28.774 alo = 440, ahi = 1101
2025-07-01 05:50:28.781 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:50:28.787 blo = 440, bhi = 1101
2025-07-01 05:50:28.795
2025-07-01 05:50:28.806 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:50:28.812 g = []
2025-07-01 05:50:28.818 if alo < ahi:
2025-07-01 05:50:28.827 if blo < bhi:
2025-07-01 05:50:28.835 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:50:28.842 else:
2025-07-01 05:50:28.848 g = self._dump('-', a, alo, ahi)
2025-07-01 05:50:28.857 elif blo < bhi:
2025-07-01 05:50:28.867 g = self._dump('+', b, blo, bhi)
2025-07-01 05:50:28.873
2025-07-01 05:50:28.880 > yield from g
2025-07-01 05:50:28.886
2025-07-01 05:50:28.893 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:50:28.900 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:50:28.907
2025-07-01 05:50:28.914 self = <difflib.Differ object at [hex]>
2025-07-01 05:50:28.923 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:50:28.931 alo = 440, ahi = 1101
2025-07-01 05:50:28.940 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:50:28.949 blo = 440, bhi = 1101
2025-07-01 05:50:28.954
2025-07-01 05:50:28.960 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:50:28.965 r"""
2025-07-01 05:50:28.970 When replacing one block of lines with another, search the blocks
2025-07-01 05:50:28.975 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:50:28.980 synch point, and intraline difference marking is done on the
2025-07-01 05:50:28.985 similar pair. Lots of work, but often worth it.
2025-07-01 05:50:28.990
2025-07-01 05:50:28.995 Example:
2025-07-01 05:50:29.000
2025-07-01 05:50:29.005 >>> d = Differ()
2025-07-01 05:50:29.010 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:50:29.015 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:50:29.020 >>> print(''.join(results), end="")
2025-07-01 05:50:29.025 - abcDefghiJkl
2025-07-01 05:50:29.034 + abcdefGhijkl
2025-07-01 05:50:29.044 """
2025-07-01 05:50:29.049
2025-07-01 05:50:29.054 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:50:29.060 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:50:29.065 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:50:29.071 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:50:29.077 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:50:29.083
2025-07-01 05:50:29.090 # search for the pair that matches best without being identical
2025-07-01 05:50:29.099 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:50:29.109 # on junk -- unless we have to)
2025-07-01 05:50:29.117 for j in range(blo, bhi):
2025-07-01 05:50:29.123 bj = b[j]
2025-07-01 05:50:29.129 cruncher.set_seq2(bj)
2025-07-01 05:50:29.134 for i in range(alo, ahi):
2025-07-01 05:50:29.138 ai = a[i]
2025-07-01 05:50:29.143 if ai == bj:
2025-07-01 05:50:29.148 if eqi is None:
2025-07-01 05:50:29.153 eqi, eqj = i, j
2025-07-01 05:50:29.158 continue
2025-07-01 05:50:29.163 cruncher.set_seq1(ai)
2025-07-01 05:50:29.168 # computing similarity is expensive, so use the quick
2025-07-01 05:50:29.174 # upper bounds first -- have seen this speed up messy
2025-07-01 05:50:29.181 # compares by a factor of 3.
2025-07-01 05:50:29.188 # note that ratio() is only expensive to compute the first
2025-07-01 05:50:29.195 # time it's called on a sequence pair; the expensive part
2025-07-01 05:50:29.202 # of the computation is cached by cruncher
2025-07-01 05:50:29.214 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:50:29.222 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:50:29.229 cruncher.ratio() > best_ratio:
2025-07-01 05:50:29.235 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:50:29.240 if best_ratio < cutoff:
2025-07-01 05:50:29.246 # no non-identical "pretty close" pair
2025-07-01 05:50:29.252 if eqi is None:
2025-07-01 05:50:29.258 # no identical pair either -- treat it as a straight replace
2025-07-01 05:50:29.265 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:50:29.272 return
2025-07-01 05:50:29.278 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:50:29.283 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:50:29.290 else:
2025-07-01 05:50:29.299 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:50:29.309 eqi = None
2025-07-01 05:50:29.317
2025-07-01 05:50:29.323 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:50:29.329 # identical
2025-07-01 05:50:29.333
2025-07-01 05:50:29.338 # pump out diffs from before the synch point
2025-07-01 05:50:29.343 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:50:29.348
2025-07-01 05:50:29.354 # do intraline marking on the synch pair
2025-07-01 05:50:29.360 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:50:29.366 if eqi is None:
2025-07-01 05:50:29.374 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:50:29.381 atags = btags = ""
2025-07-01 05:50:29.387 cruncher.set_seqs(aelt, belt)
2025-07-01 05:50:29.393 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:50:29.399 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:50:29.404 if tag == 'replace':
2025-07-01 05:50:29.410 atags += '^' * la
2025-07-01 05:50:29.415 btags += '^' * lb
2025-07-01 05:50:29.421 elif tag == 'delete':
2025-07-01 05:50:29.426 atags += '-' * la
2025-07-01 05:50:29.432 elif tag == 'insert':
2025-07-01 05:50:29.438 btags += '+' * lb
2025-07-01 05:50:29.443 elif tag == 'equal':
2025-07-01 05:50:29.449 atags += ' ' * la
2025-07-01 05:50:29.454 btags += ' ' * lb
2025-07-01 05:50:29.459 else:
2025-07-01 05:50:29.463 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:50:29.468 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:50:29.473 else:
2025-07-01 05:50:29.483 # the synch pair is identical
2025-07-01 05:50:29.490 yield ' ' + aelt
2025-07-01 05:50:29.496
2025-07-01 05:50:29.502 # pump out diffs from after the synch point
2025-07-01 05:50:29.510 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:50:29.521
2025-07-01 05:50:29.529 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:50:29.535 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:50:29.540
2025-07-01 05:50:29.545 self = <difflib.Differ object at [hex]>
2025-07-01 05:50:29.550 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:50:29.555 alo = 441, ahi = 1101
2025-07-01 05:50:29.560 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:50:29.564 blo = 441, bhi = 1101
2025-07-01 05:50:29.569
2025-07-01 05:50:29.574 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:50:29.579 g = []
2025-07-01 05:50:29.585 if alo < ahi:
2025-07-01 05:50:29.592 if blo < bhi:
2025-07-01 05:50:29.599 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:50:29.606 else:
2025-07-01 05:50:29.612 g = self._dump('-', a, alo, ahi)
2025-07-01 05:50:29.619 elif blo < bhi:
2025-07-01 05:50:29.625 g = self._dump('+', b, blo, bhi)
2025-07-01 05:50:29.632
2025-07-01 05:50:29.638 > yield from g
2025-07-01 05:50:29.649
2025-07-01 05:50:29.658 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:50:29.665 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:50:29.671
2025-07-01 05:50:29.677 self = <difflib.Differ object at [hex]>
2025-07-01 05:50:29.682 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:50:29.686 alo = 441, ahi = 1101
2025-07-01 05:50:29.691 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:50:29.696 blo = 441, bhi = 1101
2025-07-01 05:50:29.701
2025-07-01 05:50:29.713 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:50:29.722 r"""
2025-07-01 05:50:29.729 When replacing one block of lines with another, search the blocks
2025-07-01 05:50:29.736 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:50:29.744 synch point, and intraline difference marking is done on the
2025-07-01 05:50:29.755 similar pair. Lots of work, but often worth it.
2025-07-01 05:50:29.764
2025-07-01 05:50:29.770 Example:
2025-07-01 05:50:29.776
2025-07-01 05:50:29.783 >>> d = Differ()
2025-07-01 05:50:29.790 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:50:29.801 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:50:29.809 >>> print(''.join(results), end="")
2025-07-01 05:50:29.816 - abcDefghiJkl
2025-07-01 05:50:29.828 + abcdefGhijkl
2025-07-01 05:50:29.845 """
2025-07-01 05:50:29.854
2025-07-01 05:50:29.861 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:50:29.867 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:50:29.874 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:50:29.880 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:50:29.887 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:50:29.893
2025-07-01 05:50:29.899 # search for the pair that matches best without being identical
2025-07-01 05:50:29.907 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:50:29.918 # on junk -- unless we have to)
2025-07-01 05:50:29.926 for j in range(blo, bhi):
2025-07-01 05:50:29.932 bj = b[j]
2025-07-01 05:50:29.939 cruncher.set_seq2(bj)
2025-07-01 05:50:29.947 for i in range(alo, ahi):
2025-07-01 05:50:29.957 ai = a[i]
2025-07-01 05:50:29.965 if ai == bj:
2025-07-01 05:50:29.971 if eqi is None:
2025-07-01 05:50:29.977 eqi, eqj = i, j
2025-07-01 05:50:29.985 continue
2025-07-01 05:50:29.998 cruncher.set_seq1(ai)
2025-07-01 05:50:30.010 # computing similarity is expensive, so use the quick
2025-07-01 05:50:30.020 # upper bounds first -- have seen this speed up messy
2025-07-01 05:50:30.032 # compares by a factor of 3.
2025-07-01 05:50:30.042 # note that ratio() is only expensive to compute the first
2025-07-01 05:50:30.053 # time it's called on a sequence pair; the expensive part
2025-07-01 05:50:30.065 # of the computation is cached by cruncher
2025-07-01 05:50:30.077 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:50:30.091 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:50:30.102 cruncher.ratio() > best_ratio:
2025-07-01 05:50:30.115 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:50:30.125 if best_ratio < cutoff:
2025-07-01 05:50:30.132 # no non-identical "pretty close" pair
2025-07-01 05:50:30.144 if eqi is None:
2025-07-01 05:50:30.155 # no identical pair either -- treat it as a straight replace
2025-07-01 05:50:30.167 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:50:30.180 return
2025-07-01 05:50:30.195 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:50:30.207 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:50:30.219 else:
2025-07-01 05:50:30.230 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:50:30.239 eqi = None
2025-07-01 05:50:30.248
2025-07-01 05:50:30.260 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:50:30.267 # identical
2025-07-01 05:50:30.274
2025-07-01 05:50:30.280 # pump out diffs from before the synch point
2025-07-01 05:50:30.286 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:50:30.292
2025-07-01 05:50:30.298 # do intraline marking on the synch pair
2025-07-01 05:50:30.307 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:50:30.315 if eqi is None:
2025-07-01 05:50:30.322 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:50:30.327 atags = btags = ""
2025-07-01 05:50:30.332 cruncher.set_seqs(aelt, belt)
2025-07-01 05:50:30.337 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:50:30.343 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:50:30.348 if tag == 'replace':
2025-07-01 05:50:30.354 atags += '^' * la
2025-07-01 05:50:30.361 btags += '^' * lb
2025-07-01 05:50:30.368 elif tag == 'delete':
2025-07-01 05:50:30.374 atags += '-' * la
2025-07-01 05:50:30.384 elif tag == 'insert':
2025-07-01 05:50:30.400 btags += '+' * lb
2025-07-01 05:50:30.409 elif tag == 'equal':
2025-07-01 05:50:30.417 atags += ' ' * la
2025-07-01 05:50:30.425 btags += ' ' * lb
2025-07-01 05:50:30.430 else:
2025-07-01 05:50:30.436 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:50:30.441 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:50:30.446 else:
2025-07-01 05:50:30.452 # the synch pair is identical
2025-07-01 05:50:30.457 yield ' ' + aelt
2025-07-01 05:50:30.465
2025-07-01 05:50:30.472 # pump out diffs from after the synch point
2025-07-01 05:50:30.479 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:50:30.485
2025-07-01 05:50:30.491 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:50:30.498 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:50:30.509
2025-07-01 05:50:30.517 self = <difflib.Differ object at [hex]>
2025-07-01 05:50:30.526 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:50:30.533 alo = 444, ahi = 1101
2025-07-01 05:50:30.545 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:50:30.554 blo = 444, bhi = 1101
2025-07-01 05:50:30.560
2025-07-01 05:50:30.566 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:50:30.573 g = []
2025-07-01 05:50:30.578 if alo < ahi:
2025-07-01 05:50:30.584 if blo < bhi:
2025-07-01 05:50:30.591 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:50:30.602 else:
2025-07-01 05:50:30.610 g = self._dump('-', a, alo, ahi)
2025-07-01 05:50:30.617 elif blo < bhi:
2025-07-01 05:50:30.623 g = self._dump('+', b, blo, bhi)
2025-07-01 05:50:30.629
2025-07-01 05:50:30.634 > yield from g
2025-07-01 05:50:30.640
2025-07-01 05:50:30.646 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:50:30.656 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:50:30.666
2025-07-01 05:50:30.676 self = <difflib.Differ object at [hex]>
2025-07-01 05:50:30.688 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:50:30.698 alo = 444, ahi = 1101
2025-07-01 05:50:30.708 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:50:30.715 blo = 444, bhi = 1101
2025-07-01 05:50:30.722
2025-07-01 05:50:30.729 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:50:30.734 r"""
2025-07-01 05:50:30.740 When replacing one block of lines with another, search the blocks
2025-07-01 05:50:30.746 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:50:30.756 synch point, and intraline difference marking is done on the
2025-07-01 05:50:30.765 similar pair. Lots of work, but often worth it.
2025-07-01 05:50:30.772
2025-07-01 05:50:30.779 Example:
2025-07-01 05:50:30.786
2025-07-01 05:50:30.798 >>> d = Differ()
2025-07-01 05:50:30.810 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:50:30.822 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:50:30.832 >>> print(''.join(results), end="")
2025-07-01 05:50:30.839 - abcDefghiJkl
2025-07-01 05:50:30.855 + abcdefGhijkl
2025-07-01 05:50:30.877 """
2025-07-01 05:50:30.887
2025-07-01 05:50:30.899 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:50:30.912 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:50:30.925 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:50:30.935 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:50:30.943 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:50:30.951
2025-07-01 05:50:30.958 # search for the pair that matches best without being identical
2025-07-01 05:50:30.964 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:50:30.970 # on junk -- unless we have to)
2025-07-01 05:50:30.976 for j in range(blo, bhi):
2025-07-01 05:50:30.982 bj = b[j]
2025-07-01 05:50:30.988 cruncher.set_seq2(bj)
2025-07-01 05:50:30.994 for i in range(alo, ahi):
2025-07-01 05:50:31.000 ai = a[i]
2025-07-01 05:50:31.006 if ai == bj:
2025-07-01 05:50:31.015 if eqi is None:
2025-07-01 05:50:31.028 eqi, eqj = i, j
2025-07-01 05:50:31.037 continue
2025-07-01 05:50:31.048 cruncher.set_seq1(ai)
2025-07-01 05:50:31.058 # computing similarity is expensive, so use the quick
2025-07-01 05:50:31.067 # upper bounds first -- have seen this speed up messy
2025-07-01 05:50:31.079 # compares by a factor of 3.
2025-07-01 05:50:31.087 # note that ratio() is only expensive to compute the first
2025-07-01 05:50:31.101 # time it's called on a sequence pair; the expensive part
2025-07-01 05:50:31.114 # of the computation is cached by cruncher
2025-07-01 05:50:31.128 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:50:31.140 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:50:31.152 cruncher.ratio() > best_ratio:
2025-07-01 05:50:31.163 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:50:31.172 if best_ratio < cutoff:
2025-07-01 05:50:31.181 # no non-identical "pretty close" pair
2025-07-01 05:50:31.194 if eqi is None:
2025-07-01 05:50:31.206 # no identical pair either -- treat it as a straight replace
2025-07-01 05:50:31.219 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:50:31.228 return
2025-07-01 05:50:31.237 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:50:31.244 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:50:31.250 else:
2025-07-01 05:50:31.256 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:50:31.263 eqi = None
2025-07-01 05:50:31.271
2025-07-01 05:50:31.278 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:50:31.290 # identical
2025-07-01 05:50:31.298
2025-07-01 05:50:31.306 # pump out diffs from before the synch point
2025-07-01 05:50:31.315 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:50:31.323
2025-07-01 05:50:31.333 # do intraline marking on the synch pair
2025-07-01 05:50:31.345 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:50:31.354 if eqi is None:
2025-07-01 05:50:31.366 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:50:31.378 atags = btags = ""
2025-07-01 05:50:31.389 cruncher.set_seqs(aelt, belt)
2025-07-01 05:50:31.399 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:50:31.411 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:50:31.419 if tag == 'replace':
2025-07-01 05:50:31.426 atags += '^' * la
2025-07-01 05:50:31.433 btags += '^' * lb
2025-07-01 05:50:31.438 elif tag == 'delete':
2025-07-01 05:50:31.443 atags += '-' * la
2025-07-01 05:50:31.447 elif tag == 'insert':
2025-07-01 05:50:31.452 btags += '+' * lb
2025-07-01 05:50:31.456 elif tag == 'equal':
2025-07-01 05:50:31.461 atags += ' ' * la
2025-07-01 05:50:31.465 btags += ' ' * lb
2025-07-01 05:50:31.469 else:
2025-07-01 05:50:31.479 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:50:31.491 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:50:31.501 else:
2025-07-01 05:50:31.513 # the synch pair is identical
2025-07-01 05:50:31.524 yield ' ' + aelt
2025-07-01 05:50:31.530
2025-07-01 05:50:31.536 # pump out diffs from after the synch point
2025-07-01 05:50:31.542 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:50:31.555
2025-07-01 05:50:31.562 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:50:31.571 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:50:31.578
2025-07-01 05:50:31.586 self = <difflib.Differ object at [hex]>
2025-07-01 05:50:31.594 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:50:31.606 alo = 445, ahi = 1101
2025-07-01 05:50:31.617 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:50:31.629 blo = 445, bhi = 1101
2025-07-01 05:50:31.640
2025-07-01 05:50:31.652 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:50:31.665 g = []
2025-07-01 05:50:31.677 if alo < ahi:
2025-07-01 05:50:31.688 if blo < bhi:
2025-07-01 05:50:31.699 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:50:31.710 else:
2025-07-01 05:50:31.722 g = self._dump('-', a, alo, ahi)
2025-07-01 05:50:31.732 elif blo < bhi:
2025-07-01 05:50:31.743 g = self._dump('+', b, blo, bhi)
2025-07-01 05:50:31.755
2025-07-01 05:50:31.765 > yield from g
2025-07-01 05:50:31.773
2025-07-01 05:50:31.786 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:50:31.797 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:50:31.810
2025-07-01 05:50:31.822 self = <difflib.Differ object at [hex]>
2025-07-01 05:50:31.837 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:50:31.848 alo = 445, ahi = 1101
2025-07-01 05:50:31.858 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:50:31.868 blo = 445, bhi = 1101
2025-07-01 05:50:31.878
2025-07-01 05:50:31.890 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:50:31.899 r"""
2025-07-01 05:50:31.908 When replacing one block of lines with another, search the blocks
2025-07-01 05:50:31.916 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:50:31.922 synch point, and intraline difference marking is done on the
2025-07-01 05:50:31.928 similar pair. Lots of work, but often worth it.
2025-07-01 05:50:31.934
2025-07-01 05:50:31.946 Example:
2025-07-01 05:50:31.955
2025-07-01 05:50:31.963 >>> d = Differ()
2025-07-01 05:50:31.971 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:50:31.981 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:50:31.989 >>> print(''.join(results), end="")
2025-07-01 05:50:31.996 - abcDefghiJkl
2025-07-01 05:50:32.012 + abcdefGhijkl
2025-07-01 05:50:32.033 """
2025-07-01 05:50:32.044
2025-07-01 05:50:32.055 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:50:32.065 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:50:32.074 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:50:32.083 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:50:32.095 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:50:32.105
2025-07-01 05:50:32.116 # search for the pair that matches best without being identical
2025-07-01 05:50:32.128 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:50:32.140 # on junk -- unless we have to)
2025-07-01 05:50:32.152 for j in range(blo, bhi):
2025-07-01 05:50:32.163 bj = b[j]
2025-07-01 05:50:32.174 cruncher.set_seq2(bj)
2025-07-01 05:50:32.184 for i in range(alo, ahi):
2025-07-01 05:50:32.192 ai = a[i]
2025-07-01 05:50:32.200 if ai == bj:
2025-07-01 05:50:32.213 if eqi is None:
2025-07-01 05:50:32.224 eqi, eqj = i, j
2025-07-01 05:50:32.232 continue
2025-07-01 05:50:32.240 cruncher.set_seq1(ai)
2025-07-01 05:50:32.247 # computing similarity is expensive, so use the quick
2025-07-01 05:50:32.255 # upper bounds first -- have seen this speed up messy
2025-07-01 05:50:32.266 # compares by a factor of 3.
2025-07-01 05:50:32.275 # note that ratio() is only expensive to compute the first
2025-07-01 05:50:32.284 # time it's called on a sequence pair; the expensive part
2025-07-01 05:50:32.291 # of the computation is cached by cruncher
2025-07-01 05:50:32.298 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:50:32.306 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:50:32.317 cruncher.ratio() > best_ratio:
2025-07-01 05:50:32.328 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:50:32.341 if best_ratio < cutoff:
2025-07-01 05:50:32.352 # no non-identical "pretty close" pair
2025-07-01 05:50:32.362 if eqi is None:
2025-07-01 05:50:32.371 # no identical pair either -- treat it as a straight replace
2025-07-01 05:50:32.381 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:50:32.394 return
2025-07-01 05:50:32.404 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:50:32.413 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:50:32.421 else:
2025-07-01 05:50:32.429 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:50:32.434 eqi = None
2025-07-01 05:50:32.442
2025-07-01 05:50:32.454 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:50:32.463 # identical
2025-07-01 05:50:32.473
2025-07-01 05:50:32.485 # pump out diffs from before the synch point
2025-07-01 05:50:32.495 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:50:32.506
2025-07-01 05:50:32.517 # do intraline marking on the synch pair
2025-07-01 05:50:32.529 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:50:32.539 if eqi is None:
2025-07-01 05:50:32.549 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:50:32.560 atags = btags = ""
2025-07-01 05:50:32.571 cruncher.set_seqs(aelt, belt)
2025-07-01 05:50:32.583 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:50:32.593 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:50:32.601 if tag == 'replace':
2025-07-01 05:50:32.607 atags += '^' * la
2025-07-01 05:50:32.612 btags += '^' * lb
2025-07-01 05:50:32.618 elif tag == 'delete':
2025-07-01 05:50:32.622 atags += '-' * la
2025-07-01 05:50:32.627 elif tag == 'insert':
2025-07-01 05:50:32.632 btags += '+' * lb
2025-07-01 05:50:32.636 elif tag == 'equal':
2025-07-01 05:50:32.642 atags += ' ' * la
2025-07-01 05:50:32.647 btags += ' ' * lb
2025-07-01 05:50:32.653 else:
2025-07-01 05:50:32.659 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:50:32.666 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:50:32.673 else:
2025-07-01 05:50:32.680 # the synch pair is identical
2025-07-01 05:50:32.685 yield ' ' + aelt
2025-07-01 05:50:32.689
2025-07-01 05:50:32.695 # pump out diffs from after the synch point
2025-07-01 05:50:32.705 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:50:32.717
2025-07-01 05:50:32.727 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:50:32.740 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:50:32.752
2025-07-01 05:50:32.765 self = <difflib.Differ object at [hex]>
2025-07-01 05:50:32.778 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:50:32.788 alo = 446, ahi = 1101
2025-07-01 05:50:32.800 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:50:32.812 blo = 446, bhi = 1101
2025-07-01 05:50:32.822
2025-07-01 05:50:32.833 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:50:32.842 g = []
2025-07-01 05:50:32.848 if alo < ahi:
2025-07-01 05:50:32.854 if blo < bhi:
2025-07-01 05:50:32.862 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:50:32.868 else:
2025-07-01 05:50:32.875 g = self._dump('-', a, alo, ahi)
2025-07-01 05:50:32.883 elif blo < bhi:
2025-07-01 05:50:32.892 g = self._dump('+', b, blo, bhi)
2025-07-01 05:50:32.899
2025-07-01 05:50:32.909 > yield from g
2025-07-01 05:50:32.920
2025-07-01 05:50:32.931 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:50:32.943 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:50:32.954
2025-07-01 05:50:32.966 self = <difflib.Differ object at [hex]>
2025-07-01 05:50:32.979 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:50:32.990 alo = 446, ahi = 1101
2025-07-01 05:50:33.000 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:50:33.010 blo = 446, bhi = 1101
2025-07-01 05:50:33.017
2025-07-01 05:50:33.025 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:50:33.031 r"""
2025-07-01 05:50:33.038 When replacing one block of lines with another, search the blocks
2025-07-01 05:50:33.051 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:50:33.061 synch point, and intraline difference marking is done on the
2025-07-01 05:50:33.069 similar pair. Lots of work, but often worth it.
2025-07-01 05:50:33.075
2025-07-01 05:50:33.083 Example:
2025-07-01 05:50:33.095
2025-07-01 05:50:33.107 >>> d = Differ()
2025-07-01 05:50:33.118 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:50:33.126 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:50:33.133 >>> print(''.join(results), end="")
2025-07-01 05:50:33.139 - abcDefghiJkl
2025-07-01 05:50:33.158 + abcdefGhijkl
2025-07-01 05:50:33.168 """
2025-07-01 05:50:33.173
2025-07-01 05:50:33.179 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:50:33.185 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:50:33.192 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:50:33.199 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:50:33.205 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:50:33.211
2025-07-01 05:50:33.220 # search for the pair that matches best without being identical
2025-07-01 05:50:33.230 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:50:33.237 # on junk -- unless we have to)
2025-07-01 05:50:33.243 for j in range(blo, bhi):
2025-07-01 05:50:33.248 bj = b[j]
2025-07-01 05:50:33.254 cruncher.set_seq2(bj)
2025-07-01 05:50:33.260 for i in range(alo, ahi):
2025-07-01 05:50:33.266 ai = a[i]
2025-07-01 05:50:33.274 if ai == bj:
2025-07-01 05:50:33.282 if eqi is None:
2025-07-01 05:50:33.289 eqi, eqj = i, j
2025-07-01 05:50:33.295 continue
2025-07-01 05:50:33.301 cruncher.set_seq1(ai)
2025-07-01 05:50:33.307 # computing similarity is expensive, so use the quick
2025-07-01 05:50:33.313 # upper bounds first -- have seen this speed up messy
2025-07-01 05:50:33.319 # compares by a factor of 3.
2025-07-01 05:50:33.334 # note that ratio() is only expensive to compute the first
2025-07-01 05:50:33.345 # time it's called on a sequence pair; the expensive part
2025-07-01 05:50:33.354 # of the computation is cached by cruncher
2025-07-01 05:50:33.361 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:50:33.366 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:50:33.372 cruncher.ratio() > best_ratio:
2025-07-01 05:50:33.376 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:50:33.381 if best_ratio < cutoff:
2025-07-01 05:50:33.386 # no non-identical "pretty close" pair
2025-07-01 05:50:33.391 if eqi is None:
2025-07-01 05:50:33.396 # no identical pair either -- treat it as a straight replace
2025-07-01 05:50:33.401 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:50:33.406 return
2025-07-01 05:50:33.411 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:50:33.417 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:50:33.423 else:
2025-07-01 05:50:33.428 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:50:33.434 eqi = None
2025-07-01 05:50:33.440
2025-07-01 05:50:33.447 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:50:33.460 # identical
2025-07-01 05:50:33.471
2025-07-01 05:50:33.482 # pump out diffs from before the synch point
2025-07-01 05:50:33.491 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:50:33.498
2025-07-01 05:50:33.507 # do intraline marking on the synch pair
2025-07-01 05:50:33.517 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:50:33.527 if eqi is None:
2025-07-01 05:50:33.537 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:50:33.547 atags = btags = ""
2025-07-01 05:50:33.554 cruncher.set_seqs(aelt, belt)
2025-07-01 05:50:33.562 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:50:33.572 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:50:33.583 if tag == 'replace':
2025-07-01 05:50:33.593 atags += '^' * la
2025-07-01 05:50:33.601 btags += '^' * lb
2025-07-01 05:50:33.607 elif tag == 'delete':
2025-07-01 05:50:33.612 atags += '-' * la
2025-07-01 05:50:33.617 elif tag == 'insert':
2025-07-01 05:50:33.623 btags += '+' * lb
2025-07-01 05:50:33.630 elif tag == 'equal':
2025-07-01 05:50:33.635 atags += ' ' * la
2025-07-01 05:50:33.641 btags += ' ' * lb
2025-07-01 05:50:33.646 else:
2025-07-01 05:50:33.654 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:50:33.660 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:50:33.665 else:
2025-07-01 05:50:33.670 # the synch pair is identical
2025-07-01 05:50:33.675 yield ' ' + aelt
2025-07-01 05:50:33.683
2025-07-01 05:50:33.695 # pump out diffs from after the synch point
2025-07-01 05:50:33.705 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:50:33.712
2025-07-01 05:50:33.719 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:50:33.726 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:50:33.731
2025-07-01 05:50:33.737 self = <difflib.Differ object at [hex]>
2025-07-01 05:50:33.743 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:50:33.751 alo = 447, ahi = 1101
2025-07-01 05:50:33.764 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:50:33.772 blo = 447, bhi = 1101
2025-07-01 05:50:33.778
2025-07-01 05:50:33.788 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:50:33.798 g = []
2025-07-01 05:50:33.806 if alo < ahi:
2025-07-01 05:50:33.815 if blo < bhi:
2025-07-01 05:50:33.822 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:50:33.829 else:
2025-07-01 05:50:33.835 g = self._dump('-', a, alo, ahi)
2025-07-01 05:50:33.841 elif blo < bhi:
2025-07-01 05:50:33.847 g = self._dump('+', b, blo, bhi)
2025-07-01 05:50:33.853
2025-07-01 05:50:33.858 > yield from g
2025-07-01 05:50:33.868
2025-07-01 05:50:33.877 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:50:33.885 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:50:33.892
2025-07-01 05:50:33.898 self = <difflib.Differ object at [hex]>
2025-07-01 05:50:33.905 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:50:33.911 alo = 447, ahi = 1101
2025-07-01 05:50:33.918 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:50:33.930 blo = 447, bhi = 1101
2025-07-01 05:50:33.941
2025-07-01 05:50:33.950 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:50:33.961 r"""
2025-07-01 05:50:33.971 When replacing one block of lines with another, search the blocks
2025-07-01 05:50:33.979 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:50:33.987 synch point, and intraline difference marking is done on the
2025-07-01 05:50:33.998 similar pair. Lots of work, but often worth it.
2025-07-01 05:50:34.010
2025-07-01 05:50:34.020 Example:
2025-07-01 05:50:34.030
2025-07-01 05:50:34.041 >>> d = Differ()
2025-07-01 05:50:34.052 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:50:34.061 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:50:34.073 >>> print(''.join(results), end="")
2025-07-01 05:50:34.083 - abcDefghiJkl
2025-07-01 05:50:34.099 + abcdefGhijkl
2025-07-01 05:50:34.111 """
2025-07-01 05:50:34.120
2025-07-01 05:50:34.131 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:50:34.139 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:50:34.145 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:50:34.150 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:50:34.155 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:50:34.160
2025-07-01 05:50:34.165 # search for the pair that matches best without being identical
2025-07-01 05:50:34.170 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:50:34.179 # on junk -- unless we have to)
2025-07-01 05:50:34.191 for j in range(blo, bhi):
2025-07-01 05:50:34.201 bj = b[j]
2025-07-01 05:50:34.209 cruncher.set_seq2(bj)
2025-07-01 05:50:34.217 for i in range(alo, ahi):
2025-07-01 05:50:34.223 ai = a[i]
2025-07-01 05:50:34.229 if ai == bj:
2025-07-01 05:50:34.234 if eqi is None:
2025-07-01 05:50:34.240 eqi, eqj = i, j
2025-07-01 05:50:34.246 continue
2025-07-01 05:50:34.252 cruncher.set_seq1(ai)
2025-07-01 05:50:34.259 # computing similarity is expensive, so use the quick
2025-07-01 05:50:34.265 # upper bounds first -- have seen this speed up messy
2025-07-01 05:50:34.275 # compares by a factor of 3.
2025-07-01 05:50:34.286 # note that ratio() is only expensive to compute the first
2025-07-01 05:50:34.295 # time it's called on a sequence pair; the expensive part
2025-07-01 05:50:34.303 # of the computation is cached by cruncher
2025-07-01 05:50:34.311 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:50:34.322 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:50:34.330 cruncher.ratio() > best_ratio:
2025-07-01 05:50:34.341 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:50:34.348 if best_ratio < cutoff:
2025-07-01 05:50:34.355 # no non-identical "pretty close" pair
2025-07-01 05:50:34.363 if eqi is None:
2025-07-01 05:50:34.371 # no identical pair either -- treat it as a straight replace
2025-07-01 05:50:34.384 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:50:34.391 return
2025-07-01 05:50:34.399 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:50:34.405 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:50:34.410 else:
2025-07-01 05:50:34.421 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:50:34.429 eqi = None
2025-07-01 05:50:34.437
2025-07-01 05:50:34.446 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:50:34.457 # identical
2025-07-01 05:50:34.467
2025-07-01 05:50:34.479 # pump out diffs from before the synch point
2025-07-01 05:50:34.486 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:50:34.492
2025-07-01 05:50:34.498 # do intraline marking on the synch pair
2025-07-01 05:50:34.503 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:50:34.510 if eqi is None:
2025-07-01 05:50:34.518 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:50:34.529 atags = btags = ""
2025-07-01 05:50:34.539 cruncher.set_seqs(aelt, belt)
2025-07-01 05:50:34.547 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:50:34.555 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:50:34.563 if tag == 'replace':
2025-07-01 05:50:34.574 atags += '^' * la
2025-07-01 05:50:34.582 btags += '^' * lb
2025-07-01 05:50:34.589 elif tag == 'delete':
2025-07-01 05:50:34.598 atags += '-' * la
2025-07-01 05:50:34.604 elif tag == 'insert':
2025-07-01 05:50:34.610 btags += '+' * lb
2025-07-01 05:50:34.617 elif tag == 'equal':
2025-07-01 05:50:34.623 atags += ' ' * la
2025-07-01 05:50:34.629 btags += ' ' * lb
2025-07-01 05:50:34.633 else:
2025-07-01 05:50:34.638 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:50:34.643 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:50:34.647 else:
2025-07-01 05:50:34.652 # the synch pair is identical
2025-07-01 05:50:34.657 yield ' ' + aelt
2025-07-01 05:50:34.663
2025-07-01 05:50:34.669 # pump out diffs from after the synch point
2025-07-01 05:50:34.674 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:50:34.680
2025-07-01 05:50:34.685 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:50:34.691 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:50:34.697
2025-07-01 05:50:34.702 self = <difflib.Differ object at [hex]>
2025-07-01 05:50:34.708 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:50:34.714 alo = 448, ahi = 1101
2025-07-01 05:50:34.721 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:50:34.727 blo = 448, bhi = 1101
2025-07-01 05:50:34.734
2025-07-01 05:50:34.744 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:50:34.753 g = []
2025-07-01 05:50:34.760 if alo < ahi:
2025-07-01 05:50:34.767 if blo < bhi:
2025-07-01 05:50:34.772 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:50:34.777 else:
2025-07-01 05:50:34.782 g = self._dump('-', a, alo, ahi)
2025-07-01 05:50:34.788 elif blo < bhi:
2025-07-01 05:50:34.794 g = self._dump('+', b, blo, bhi)
2025-07-01 05:50:34.800
2025-07-01 05:50:34.805 > yield from g
2025-07-01 05:50:34.811
2025-07-01 05:50:34.817 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:50:34.823 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:50:34.829
2025-07-01 05:50:34.835 self = <difflib.Differ object at [hex]>
2025-07-01 05:50:34.842 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:50:34.854 alo = 448, ahi = 1101
2025-07-01 05:50:34.864 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:50:34.872 blo = 448, bhi = 1101
2025-07-01 05:50:34.879
2025-07-01 05:50:34.887 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:50:34.896 r"""
2025-07-01 05:50:34.903 When replacing one block of lines with another, search the blocks
2025-07-01 05:50:34.908 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:50:34.914 synch point, and intraline difference marking is done on the
2025-07-01 05:50:34.918 similar pair. Lots of work, but often worth it.
2025-07-01 05:50:34.923
2025-07-01 05:50:34.928 Example:
2025-07-01 05:50:34.941
2025-07-01 05:50:34.953 >>> d = Differ()
2025-07-01 05:50:34.966 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:50:34.976 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:50:34.984 >>> print(''.join(results), end="")
2025-07-01 05:50:34.991 - abcDefghiJkl
2025-07-01 05:50:35.007 + abcdefGhijkl
2025-07-01 05:50:35.028 """
2025-07-01 05:50:35.042
2025-07-01 05:50:35.054 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:50:35.064 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:50:35.076 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:50:35.089 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:50:35.098 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:50:35.105
2025-07-01 05:50:35.112 # search for the pair that matches best without being identical
2025-07-01 05:50:35.119 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:50:35.124 # on junk -- unless we have to)
2025-07-01 05:50:35.130 for j in range(blo, bhi):
2025-07-01 05:50:35.140 bj = b[j]
2025-07-01 05:50:35.149 cruncher.set_seq2(bj)
2025-07-01 05:50:35.156 for i in range(alo, ahi):
2025-07-01 05:50:35.162 ai = a[i]
2025-07-01 05:50:35.169 if ai == bj:
2025-07-01 05:50:35.175 if eqi is None:
2025-07-01 05:50:35.180 eqi, eqj = i, j
2025-07-01 05:50:35.185 continue
2025-07-01 05:50:35.190 cruncher.set_seq1(ai)
2025-07-01 05:50:35.196 # computing similarity is expensive, so use the quick
2025-07-01 05:50:35.203 # upper bounds first -- have seen this speed up messy
2025-07-01 05:50:35.211 # compares by a factor of 3.
2025-07-01 05:50:35.222 # note that ratio() is only expensive to compute the first
2025-07-01 05:50:35.232 # time it's called on a sequence pair; the expensive part
2025-07-01 05:50:35.240 # of the computation is cached by cruncher
2025-07-01 05:50:35.246 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:50:35.253 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:50:35.259 cruncher.ratio() > best_ratio:
2025-07-01 05:50:35.267 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:50:35.274 if best_ratio < cutoff:
2025-07-01 05:50:35.281 # no non-identical "pretty close" pair
2025-07-01 05:50:35.287 if eqi is None:
2025-07-01 05:50:35.293 # no identical pair either -- treat it as a straight replace
2025-07-01 05:50:35.299 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:50:35.304 return
2025-07-01 05:50:35.311 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:50:35.319 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:50:35.326 else:
2025-07-01 05:50:35.334 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:50:35.344 eqi = None
2025-07-01 05:50:35.354
2025-07-01 05:50:35.362 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:50:35.373 # identical
2025-07-01 05:50:35.381
2025-07-01 05:50:35.392 # pump out diffs from before the synch point
2025-07-01 05:50:35.401 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:50:35.409
2025-07-01 05:50:35.420 # do intraline marking on the synch pair
2025-07-01 05:50:35.429 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:50:35.437 if eqi is None:
2025-07-01 05:50:35.444 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:50:35.450 atags = btags = ""
2025-07-01 05:50:35.456 cruncher.set_seqs(aelt, belt)
2025-07-01 05:50:35.462 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:50:35.473 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:50:35.482 if tag == 'replace':
2025-07-01 05:50:35.489 atags += '^' * la
2025-07-01 05:50:35.496 btags += '^' * lb
2025-07-01 05:50:35.502 elif tag == 'delete':
2025-07-01 05:50:35.509 atags += '-' * la
2025-07-01 05:50:35.522 elif tag == 'insert':
2025-07-01 05:50:35.532 btags += '+' * lb
2025-07-01 05:50:35.540 elif tag == 'equal':
2025-07-01 05:50:35.547 atags += ' ' * la
2025-07-01 05:50:35.556 btags += ' ' * lb
2025-07-01 05:50:35.567 else:
2025-07-01 05:50:35.576 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:50:35.583 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:50:35.590 else:
2025-07-01 05:50:35.600 # the synch pair is identical
2025-07-01 05:50:35.609 yield ' ' + aelt
2025-07-01 05:50:35.618
2025-07-01 05:50:35.623 # pump out diffs from after the synch point
2025-07-01 05:50:35.629 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:50:35.635
2025-07-01 05:50:35.641 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:50:35.647 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:50:35.653
2025-07-01 05:50:35.658 self = <difflib.Differ object at [hex]>
2025-07-01 05:50:35.664 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:50:35.670 alo = 449, ahi = 1101
2025-07-01 05:50:35.685 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:50:35.697 blo = 449, bhi = 1101
2025-07-01 05:50:35.705
2025-07-01 05:50:35.712 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:50:35.718 g = []
2025-07-01 05:50:35.726 if alo < ahi:
2025-07-01 05:50:35.732 if blo < bhi:
2025-07-01 05:50:35.738 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:50:35.746 else:
2025-07-01 05:50:35.752 g = self._dump('-', a, alo, ahi)
2025-07-01 05:50:35.758 elif blo < bhi:
2025-07-01 05:50:35.763 g = self._dump('+', b, blo, bhi)
2025-07-01 05:50:35.768
2025-07-01 05:50:35.773 > yield from g
2025-07-01 05:50:35.779
2025-07-01 05:50:35.785 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:50:35.791 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:50:35.797
2025-07-01 05:50:35.802 self = <difflib.Differ object at [hex]>
2025-07-01 05:50:35.809 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:50:35.815 alo = 449, ahi = 1101
2025-07-01 05:50:35.823 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:50:35.833 blo = 449, bhi = 1101
2025-07-01 05:50:35.842
2025-07-01 05:50:35.853 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:50:35.861 r"""
2025-07-01 05:50:35.870 When replacing one block of lines with another, search the blocks
2025-07-01 05:50:35.879 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:50:35.886 synch point, and intraline difference marking is done on the
2025-07-01 05:50:35.892 similar pair. Lots of work, but often worth it.
2025-07-01 05:50:35.897
2025-07-01 05:50:35.902 Example:
2025-07-01 05:50:35.907
2025-07-01 05:50:35.913 >>> d = Differ()
2025-07-01 05:50:35.918 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:50:35.927 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:50:35.939 >>> print(''.join(results), end="")
2025-07-01 05:50:35.948 - abcDefghiJkl
2025-07-01 05:50:35.964 + abcdefGhijkl
2025-07-01 05:50:35.980 """
2025-07-01 05:50:35.991
2025-07-01 05:50:36.000 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:50:36.009 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:50:36.016 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:50:36.023 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:50:36.033 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:50:36.043
2025-07-01 05:50:36.051 # search for the pair that matches best without being identical
2025-07-01 05:50:36.056 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:50:36.061 # on junk -- unless we have to)
2025-07-01 05:50:36.065 for j in range(blo, bhi):
2025-07-01 05:50:36.070 bj = b[j]
2025-07-01 05:50:36.082 cruncher.set_seq2(bj)
2025-07-01 05:50:36.090 for i in range(alo, ahi):
2025-07-01 05:50:36.096 ai = a[i]
2025-07-01 05:50:36.102 if ai == bj:
2025-07-01 05:50:36.107 if eqi is None:
2025-07-01 05:50:36.113 eqi, eqj = i, j
2025-07-01 05:50:36.119 continue
2025-07-01 05:50:36.127 cruncher.set_seq1(ai)
2025-07-01 05:50:36.135 # computing similarity is expensive, so use the quick
2025-07-01 05:50:36.142 # upper bounds first -- have seen this speed up messy
2025-07-01 05:50:36.147 # compares by a factor of 3.
2025-07-01 05:50:36.153 # note that ratio() is only expensive to compute the first
2025-07-01 05:50:36.157 # time it's called on a sequence pair; the expensive part
2025-07-01 05:50:36.162 # of the computation is cached by cruncher
2025-07-01 05:50:36.167 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:50:36.171 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:50:36.176 cruncher.ratio() > best_ratio:
2025-07-01 05:50:36.181 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:50:36.186 if best_ratio < cutoff:
2025-07-01 05:50:36.191 # no non-identical "pretty close" pair
2025-07-01 05:50:36.197 if eqi is None:
2025-07-01 05:50:36.202 # no identical pair either -- treat it as a straight replace
2025-07-01 05:50:36.209 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:50:36.215 return
2025-07-01 05:50:36.222 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:50:36.231 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:50:36.242 else:
2025-07-01 05:50:36.250 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:50:36.256 eqi = None
2025-07-01 05:50:36.265
2025-07-01 05:50:36.274 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:50:36.283 # identical
2025-07-01 05:50:36.291
2025-07-01 05:50:36.297 # pump out diffs from before the synch point
2025-07-01 05:50:36.304 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:50:36.310
2025-07-01 05:50:36.317 # do intraline marking on the synch pair
2025-07-01 05:50:36.323 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:50:36.328 if eqi is None:
2025-07-01 05:50:36.333 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:50:36.339 atags = btags = ""
2025-07-01 05:50:36.347 cruncher.set_seqs(aelt, belt)
2025-07-01 05:50:36.362 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:50:36.375 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:50:36.382 if tag == 'replace':
2025-07-01 05:50:36.391 atags += '^' * la
2025-07-01 05:50:36.399 btags += '^' * lb
2025-07-01 05:50:36.412 elif tag == 'delete':
2025-07-01 05:50:36.423 atags += '-' * la
2025-07-01 05:50:36.435 elif tag == 'insert':
2025-07-01 05:50:36.445 btags += '+' * lb
2025-07-01 05:50:36.456 elif tag == 'equal':
2025-07-01 05:50:36.463 atags += ' ' * la
2025-07-01 05:50:36.470 btags += ' ' * lb
2025-07-01 05:50:36.481 else:
2025-07-01 05:50:36.493 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:50:36.502 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:50:36.509 else:
2025-07-01 05:50:36.521 # the synch pair is identical
2025-07-01 05:50:36.535 yield ' ' + aelt
2025-07-01 05:50:36.547
2025-07-01 05:50:36.557 # pump out diffs from after the synch point
2025-07-01 05:50:36.566 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:50:36.573
2025-07-01 05:50:36.580 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:50:36.590 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:50:36.600
2025-07-01 05:50:36.609 self = <difflib.Differ object at [hex]>
2025-07-01 05:50:36.618 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:50:36.623 alo = 450, ahi = 1101
2025-07-01 05:50:36.631 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:50:36.641 blo = 450, bhi = 1101
2025-07-01 05:50:36.649
2025-07-01 05:50:36.656 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:50:36.662 g = []
2025-07-01 05:50:36.672 if alo < ahi:
2025-07-01 05:50:36.681 if blo < bhi:
2025-07-01 05:50:36.689 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:50:36.696 else:
2025-07-01 05:50:36.702 g = self._dump('-', a, alo, ahi)
2025-07-01 05:50:36.712 elif blo < bhi:
2025-07-01 05:50:36.721 g = self._dump('+', b, blo, bhi)
2025-07-01 05:50:36.729
2025-07-01 05:50:36.735 > yield from g
2025-07-01 05:50:36.745
2025-07-01 05:50:36.757 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:50:36.766 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:50:36.774
2025-07-01 05:50:36.780 self = <difflib.Differ object at [hex]>
2025-07-01 05:50:36.786 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:50:36.791 alo = 450, ahi = 1101
2025-07-01 05:50:36.797 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:50:36.802 blo = 450, bhi = 1101
2025-07-01 05:50:36.812
2025-07-01 05:50:36.825 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:50:36.833 r"""
2025-07-01 05:50:36.843 When replacing one block of lines with another, search the blocks
2025-07-01 05:50:36.850 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:50:36.863 synch point, and intraline difference marking is done on the
2025-07-01 05:50:36.872 similar pair. Lots of work, but often worth it.
2025-07-01 05:50:36.880
2025-07-01 05:50:36.887 Example:
2025-07-01 05:50:36.895
2025-07-01 05:50:36.907 >>> d = Differ()
2025-07-01 05:50:36.917 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:50:36.927 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:50:36.940 >>> print(''.join(results), end="")
2025-07-01 05:50:36.950 - abcDefghiJkl
2025-07-01 05:50:36.972 + abcdefGhijkl
2025-07-01 05:50:36.985 """
2025-07-01 05:50:36.991
2025-07-01 05:50:36.998 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:50:37.004 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:50:37.010 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:50:37.021 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:50:37.029 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:50:37.035
2025-07-01 05:50:37.041 # search for the pair that matches best without being identical
2025-07-01 05:50:37.046 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:50:37.051 # on junk -- unless we have to)
2025-07-01 05:50:37.058 for j in range(blo, bhi):
2025-07-01 05:50:37.065 bj = b[j]
2025-07-01 05:50:37.071 cruncher.set_seq2(bj)
2025-07-01 05:50:37.077 for i in range(alo, ahi):
2025-07-01 05:50:37.082 ai = a[i]
2025-07-01 05:50:37.088 if ai == bj:
2025-07-01 05:50:37.095 if eqi is None:
2025-07-01 05:50:37.106 eqi, eqj = i, j
2025-07-01 05:50:37.114 continue
2025-07-01 05:50:37.121 cruncher.set_seq1(ai)
2025-07-01 05:50:37.127 # computing similarity is expensive, so use the quick
2025-07-01 05:50:37.133 # upper bounds first -- have seen this speed up messy
2025-07-01 05:50:37.139 # compares by a factor of 3.
2025-07-01 05:50:37.146 # note that ratio() is only expensive to compute the first
2025-07-01 05:50:37.157 # time it's called on a sequence pair; the expensive part
2025-07-01 05:50:37.165 # of the computation is cached by cruncher
2025-07-01 05:50:37.172 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:50:37.179 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:50:37.188 cruncher.ratio() > best_ratio:
2025-07-01 05:50:37.197 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:50:37.204 if best_ratio < cutoff:
2025-07-01 05:50:37.211 # no non-identical "pretty close" pair
2025-07-01 05:50:37.217 if eqi is None:
2025-07-01 05:50:37.223 # no identical pair either -- treat it as a straight replace
2025-07-01 05:50:37.229 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:50:37.234 return
2025-07-01 05:50:37.245 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:50:37.253 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:50:37.262 else:
2025-07-01 05:50:37.270 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:50:37.280 eqi = None
2025-07-01 05:50:37.290
2025-07-01 05:50:37.299 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:50:37.305 # identical
2025-07-01 05:50:37.311
2025-07-01 05:50:37.317 # pump out diffs from before the synch point
2025-07-01 05:50:37.324 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:50:37.331
2025-07-01 05:50:37.339 # do intraline marking on the synch pair
2025-07-01 05:50:37.349 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:50:37.357 if eqi is None:
2025-07-01 05:50:37.364 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:50:37.369 atags = btags = ""
2025-07-01 05:50:37.374 cruncher.set_seqs(aelt, belt)
2025-07-01 05:50:37.379 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:50:37.384 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:50:37.390 if tag == 'replace':
2025-07-01 05:50:37.395 atags += '^' * la
2025-07-01 05:50:37.402 btags += '^' * lb
2025-07-01 05:50:37.413 elif tag == 'delete':
2025-07-01 05:50:37.422 atags += '-' * la
2025-07-01 05:50:37.430 elif tag == 'insert':
2025-07-01 05:50:37.440 btags += '+' * lb
2025-07-01 05:50:37.450 elif tag == 'equal':
2025-07-01 05:50:37.458 atags += ' ' * la
2025-07-01 05:50:37.466 btags += ' ' * lb
2025-07-01 05:50:37.475 else:
2025-07-01 05:50:37.483 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:50:37.490 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:50:37.500 else:
2025-07-01 05:50:37.510 # the synch pair is identical
2025-07-01 05:50:37.518 yield ' ' + aelt
2025-07-01 05:50:37.529
2025-07-01 05:50:37.542 # pump out diffs from after the synch point
2025-07-01 05:50:37.555 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:50:37.563
2025-07-01 05:50:37.571 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:50:37.579 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:50:37.586
2025-07-01 05:50:37.597 self = <difflib.Differ object at [hex]>
2025-07-01 05:50:37.606 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:50:37.613 alo = 451, ahi = 1101
2025-07-01 05:50:37.620 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:50:37.625 blo = 451, bhi = 1101
2025-07-01 05:50:37.631
2025-07-01 05:50:37.637 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:50:37.644 g = []
2025-07-01 05:50:37.650 if alo < ahi:
2025-07-01 05:50:37.656 if blo < bhi:
2025-07-01 05:50:37.661 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:50:37.666 else:
2025-07-01 05:50:37.672 g = self._dump('-', a, alo, ahi)
2025-07-01 05:50:37.677 elif blo < bhi:
2025-07-01 05:50:37.682 g = self._dump('+', b, blo, bhi)
2025-07-01 05:50:37.687
2025-07-01 05:50:37.691 > yield from g
2025-07-01 05:50:37.699
2025-07-01 05:50:37.705 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:50:37.710 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:50:37.716
2025-07-01 05:50:37.723 self = <difflib.Differ object at [hex]>
2025-07-01 05:50:37.730 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:50:37.740 alo = 451, ahi = 1101
2025-07-01 05:50:37.749 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:50:37.756 blo = 451, bhi = 1101
2025-07-01 05:50:37.763
2025-07-01 05:50:37.768 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:50:37.772 r"""
2025-07-01 05:50:37.777 When replacing one block of lines with another, search the blocks
2025-07-01 05:50:37.783 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:50:37.789 synch point, and intraline difference marking is done on the
2025-07-01 05:50:37.795 similar pair. Lots of work, but often worth it.
2025-07-01 05:50:37.801
2025-07-01 05:50:37.807 Example:
2025-07-01 05:50:37.815
2025-07-01 05:50:37.825 >>> d = Differ()
2025-07-01 05:50:37.833 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:50:37.840 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:50:37.845 >>> print(''.join(results), end="")
2025-07-01 05:50:37.850 - abcDefghiJkl
2025-07-01 05:50:37.864 + abcdefGhijkl
2025-07-01 05:50:37.885 """
2025-07-01 05:50:37.893
2025-07-01 05:50:37.900 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:50:37.907 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:50:37.914 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:50:37.925 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:50:37.936 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:50:37.947
2025-07-01 05:50:37.957 # search for the pair that matches best without being identical
2025-07-01 05:50:37.970 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:50:37.980 # on junk -- unless we have to)
2025-07-01 05:50:37.989 for j in range(blo, bhi):
2025-07-01 05:50:38.001 bj = b[j]
2025-07-01 05:50:38.013 cruncher.set_seq2(bj)
2025-07-01 05:50:38.021 for i in range(alo, ahi):
2025-07-01 05:50:38.029 ai = a[i]
2025-07-01 05:50:38.035 if ai == bj:
2025-07-01 05:50:38.042 if eqi is None:
2025-07-01 05:50:38.054 eqi, eqj = i, j
2025-07-01 05:50:38.063 continue
2025-07-01 05:50:38.071 cruncher.set_seq1(ai)
2025-07-01 05:50:38.078 # computing similarity is expensive, so use the quick
2025-07-01 05:50:38.088 # upper bounds first -- have seen this speed up messy
2025-07-01 05:50:38.100 # compares by a factor of 3.
2025-07-01 05:50:38.109 # note that ratio() is only expensive to compute the first
2025-07-01 05:50:38.116 # time it's called on a sequence pair; the expensive part
2025-07-01 05:50:38.123 # of the computation is cached by cruncher
2025-07-01 05:50:38.131 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:50:38.143 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:50:38.152 cruncher.ratio() > best_ratio:
2025-07-01 05:50:38.160 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:50:38.167 if best_ratio < cutoff:
2025-07-01 05:50:38.175 # no non-identical "pretty close" pair
2025-07-01 05:50:38.182 if eqi is None:
2025-07-01 05:50:38.188 # no identical pair either -- treat it as a straight replace
2025-07-01 05:50:38.195 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:50:38.203 return
2025-07-01 05:50:38.215 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:50:38.229 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:50:38.240 else:
2025-07-01 05:50:38.252 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:50:38.264 eqi = None
2025-07-01 05:50:38.273
2025-07-01 05:50:38.281 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:50:38.288 # identical
2025-07-01 05:50:38.294
2025-07-01 05:50:38.301 # pump out diffs from before the synch point
2025-07-01 05:50:38.308 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:50:38.315
2025-07-01 05:50:38.326 # do intraline marking on the synch pair
2025-07-01 05:50:38.335 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:50:38.343 if eqi is None:
2025-07-01 05:50:38.354 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:50:38.365 atags = btags = ""
2025-07-01 05:50:38.372 cruncher.set_seqs(aelt, belt)
2025-07-01 05:50:38.387 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:50:38.400 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:50:38.409 if tag == 'replace':
2025-07-01 05:50:38.422 atags += '^' * la
2025-07-01 05:50:38.433 btags += '^' * lb
2025-07-01 05:50:38.441 elif tag == 'delete':
2025-07-01 05:50:38.451 atags += '-' * la
2025-07-01 05:50:38.462 elif tag == 'insert':
2025-07-01 05:50:38.472 btags += '+' * lb
2025-07-01 05:50:38.481 elif tag == 'equal':
2025-07-01 05:50:38.488 atags += ' ' * la
2025-07-01 05:50:38.495 btags += ' ' * lb
2025-07-01 05:50:38.501 else:
2025-07-01 05:50:38.514 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:50:38.523 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:50:38.530 else:
2025-07-01 05:50:38.537 # the synch pair is identical
2025-07-01 05:50:38.544 yield ' ' + aelt
2025-07-01 05:50:38.553
2025-07-01 05:50:38.561 # pump out diffs from after the synch point
2025-07-01 05:50:38.569 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:50:38.575
2025-07-01 05:50:38.582 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:50:38.592 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:50:38.606
2025-07-01 05:50:38.616 self = <difflib.Differ object at [hex]>
2025-07-01 05:50:38.627 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:50:38.638 alo = 452, ahi = 1101
2025-07-01 05:50:38.648 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:50:38.655 blo = 452, bhi = 1101
2025-07-01 05:50:38.662
2025-07-01 05:50:38.669 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:50:38.675 g = []
2025-07-01 05:50:38.683 if alo < ahi:
2025-07-01 05:50:38.694 if blo < bhi:
2025-07-01 05:50:38.702 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:50:38.709 else:
2025-07-01 05:50:38.716 g = self._dump('-', a, alo, ahi)
2025-07-01 05:50:38.722 elif blo < bhi:
2025-07-01 05:50:38.729 g = self._dump('+', b, blo, bhi)
2025-07-01 05:50:38.736
2025-07-01 05:50:38.743 > yield from g
2025-07-01 05:50:38.750
2025-07-01 05:50:38.755 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:50:38.760 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:50:38.767
2025-07-01 05:50:38.776 self = <difflib.Differ object at [hex]>
2025-07-01 05:50:38.784 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:50:38.790 alo = 452, ahi = 1101
2025-07-01 05:50:38.799 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:50:38.804 blo = 452, bhi = 1101
2025-07-01 05:50:38.810
2025-07-01 05:50:38.820 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:50:38.831 r"""
2025-07-01 05:50:38.844 When replacing one block of lines with another, search the blocks
2025-07-01 05:50:38.854 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:50:38.863 synch point, and intraline difference marking is done on the
2025-07-01 05:50:38.871 similar pair. Lots of work, but often worth it.
2025-07-01 05:50:38.883
2025-07-01 05:50:38.894 Example:
2025-07-01 05:50:38.905
2025-07-01 05:50:38.915 >>> d = Differ()
2025-07-01 05:50:38.924 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:50:38.931 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:50:38.937 >>> print(''.join(results), end="")
2025-07-01 05:50:38.948 - abcDefghiJkl
2025-07-01 05:50:38.970 + abcdefGhijkl
2025-07-01 05:50:38.992 """
2025-07-01 05:50:39.004
2025-07-01 05:50:39.015 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:50:39.026 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:50:39.034 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:50:39.042 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:50:39.052 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:50:39.063
2025-07-01 05:50:39.073 # search for the pair that matches best without being identical
2025-07-01 05:50:39.087 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:50:39.094 # on junk -- unless we have to)
2025-07-01 05:50:39.100 for j in range(blo, bhi):
2025-07-01 05:50:39.107 bj = b[j]
2025-07-01 05:50:39.118 cruncher.set_seq2(bj)
2025-07-01 05:50:39.126 for i in range(alo, ahi):
2025-07-01 05:50:39.133 ai = a[i]
2025-07-01 05:50:39.143 if ai == bj:
2025-07-01 05:50:39.153 if eqi is None:
2025-07-01 05:50:39.160 eqi, eqj = i, j
2025-07-01 05:50:39.171 continue
2025-07-01 05:50:39.178 cruncher.set_seq1(ai)
2025-07-01 05:50:39.184 # computing similarity is expensive, so use the quick
2025-07-01 05:50:39.190 # upper bounds first -- have seen this speed up messy
2025-07-01 05:50:39.196 # compares by a factor of 3.
2025-07-01 05:50:39.202 # note that ratio() is only expensive to compute the first
2025-07-01 05:50:39.208 # time it's called on a sequence pair; the expensive part
2025-07-01 05:50:39.214 # of the computation is cached by cruncher
2025-07-01 05:50:39.220 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:50:39.227 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:50:39.233 cruncher.ratio() > best_ratio:
2025-07-01 05:50:39.244 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:50:39.254 if best_ratio < cutoff:
2025-07-01 05:50:39.261 # no non-identical "pretty close" pair
2025-07-01 05:50:39.267 if eqi is None:
2025-07-01 05:50:39.273 # no identical pair either -- treat it as a straight replace
2025-07-01 05:50:39.280 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:50:39.287 return
2025-07-01 05:50:39.295 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:50:39.306 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:50:39.314 else:
2025-07-01 05:50:39.321 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:50:39.327 eqi = None
2025-07-01 05:50:39.332
2025-07-01 05:50:39.341 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:50:39.352 # identical
2025-07-01 05:50:39.362
2025-07-01 05:50:39.371 # pump out diffs from before the synch point
2025-07-01 05:50:39.381 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:50:39.388
2025-07-01 05:50:39.395 # do intraline marking on the synch pair
2025-07-01 05:50:39.402 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:50:39.407 if eqi is None:
2025-07-01 05:50:39.413 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:50:39.419 atags = btags = ""
2025-07-01 05:50:39.423 cruncher.set_seqs(aelt, belt)
2025-07-01 05:50:39.428 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:50:39.433 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:50:39.438 if tag == 'replace':
2025-07-01 05:50:39.449 atags += '^' * la
2025-07-01 05:50:39.457 btags += '^' * lb
2025-07-01 05:50:39.464 elif tag == 'delete':
2025-07-01 05:50:39.471 atags += '-' * la
2025-07-01 05:50:39.477 elif tag == 'insert':
2025-07-01 05:50:39.484 btags += '+' * lb
2025-07-01 05:50:39.491 elif tag == 'equal':
2025-07-01 05:50:39.504 atags += ' ' * la
2025-07-01 05:50:39.515 btags += ' ' * lb
2025-07-01 05:50:39.524 else:
2025-07-01 05:50:39.531 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:50:39.541 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:50:39.551 else:
2025-07-01 05:50:39.562 # the synch pair is identical
2025-07-01 05:50:39.572 yield ' ' + aelt
2025-07-01 05:50:39.580
2025-07-01 05:50:39.590 # pump out diffs from after the synch point
2025-07-01 05:50:39.600 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:50:39.610
2025-07-01 05:50:39.617 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:50:39.625 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:50:39.631
2025-07-01 05:50:39.639 self = <difflib.Differ object at [hex]>
2025-07-01 05:50:39.650 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:50:39.658 alo = 453, ahi = 1101
2025-07-01 05:50:39.665 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:50:39.670 blo = 453, bhi = 1101
2025-07-01 05:50:39.675
2025-07-01 05:50:39.681 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:50:39.687 g = []
2025-07-01 05:50:39.693 if alo < ahi:
2025-07-01 05:50:39.699 if blo < bhi:
2025-07-01 05:50:39.706 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:50:39.713 else:
2025-07-01 05:50:39.724 g = self._dump('-', a, alo, ahi)
2025-07-01 05:50:39.734 elif blo < bhi:
2025-07-01 05:50:39.741 g = self._dump('+', b, blo, bhi)
2025-07-01 05:50:39.747
2025-07-01 05:50:39.752 > yield from g
2025-07-01 05:50:39.758
2025-07-01 05:50:39.763 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:50:39.768 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:50:39.774
2025-07-01 05:50:39.780 self = <difflib.Differ object at [hex]>
2025-07-01 05:50:39.787 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:50:39.794 alo = 453, ahi = 1101
2025-07-01 05:50:39.804 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:50:39.816 blo = 453, bhi = 1101
2025-07-01 05:50:39.824
2025-07-01 05:50:39.831 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:50:39.841 r"""
2025-07-01 05:50:39.850 When replacing one block of lines with another, search the blocks
2025-07-01 05:50:39.857 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:50:39.868 synch point, and intraline difference marking is done on the
2025-07-01 05:50:39.878 similar pair. Lots of work, but often worth it.
2025-07-01 05:50:39.886
2025-07-01 05:50:39.893 Example:
2025-07-01 05:50:39.899
2025-07-01 05:50:39.906 >>> d = Differ()
2025-07-01 05:50:39.914 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:50:39.921 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:50:39.927 >>> print(''.join(results), end="")
2025-07-01 05:50:39.934 - abcDefghiJkl
2025-07-01 05:50:39.952 + abcdefGhijkl
2025-07-01 05:50:39.965 """
2025-07-01 05:50:39.972
2025-07-01 05:50:39.980 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:50:39.987 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:50:39.997 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:50:40.008 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:50:40.020 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:50:40.028
2025-07-01 05:50:40.034 # search for the pair that matches best without being identical
2025-07-01 05:50:40.040 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:50:40.048 # on junk -- unless we have to)
2025-07-01 05:50:40.060 for j in range(blo, bhi):
2025-07-01 05:50:40.069 bj = b[j]
2025-07-01 05:50:40.081 cruncher.set_seq2(bj)
2025-07-01 05:50:40.092 for i in range(alo, ahi):
2025-07-01 05:50:40.100 ai = a[i]
2025-07-01 05:50:40.108 if ai == bj:
2025-07-01 05:50:40.114 if eqi is None:
2025-07-01 05:50:40.120 eqi, eqj = i, j
2025-07-01 05:50:40.126 continue
2025-07-01 05:50:40.132 cruncher.set_seq1(ai)
2025-07-01 05:50:40.140 # computing similarity is expensive, so use the quick
2025-07-01 05:50:40.149 # upper bounds first -- have seen this speed up messy
2025-07-01 05:50:40.155 # compares by a factor of 3.
2025-07-01 05:50:40.161 # note that ratio() is only expensive to compute the first
2025-07-01 05:50:40.167 # time it's called on a sequence pair; the expensive part
2025-07-01 05:50:40.173 # of the computation is cached by cruncher
2025-07-01 05:50:40.180 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:50:40.186 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:50:40.191 cruncher.ratio() > best_ratio:
2025-07-01 05:50:40.198 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:50:40.203 if best_ratio < cutoff:
2025-07-01 05:50:40.208 # no non-identical "pretty close" pair
2025-07-01 05:50:40.214 if eqi is None:
2025-07-01 05:50:40.224 # no identical pair either -- treat it as a straight replace
2025-07-01 05:50:40.233 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:50:40.241 return
2025-07-01 05:50:40.249 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:50:40.255 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:50:40.263 else:
2025-07-01 05:50:40.273 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:50:40.284 eqi = None
2025-07-01 05:50:40.295
2025-07-01 05:50:40.307 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:50:40.317 # identical
2025-07-01 05:50:40.328
2025-07-01 05:50:40.339 # pump out diffs from before the synch point
2025-07-01 05:50:40.348 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:50:40.355
2025-07-01 05:50:40.362 # do intraline marking on the synch pair
2025-07-01 05:50:40.368 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:50:40.377 if eqi is None:
2025-07-01 05:50:40.388 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:50:40.396 atags = btags = ""
2025-07-01 05:50:40.402 cruncher.set_seqs(aelt, belt)
2025-07-01 05:50:40.406 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:50:40.411 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:50:40.415 if tag == 'replace':
2025-07-01 05:50:40.419 atags += '^' * la
2025-07-01 05:50:40.424 btags += '^' * lb
2025-07-01 05:50:40.428 elif tag == 'delete':
2025-07-01 05:50:40.432 atags += '-' * la
2025-07-01 05:50:40.437 elif tag == 'insert':
2025-07-01 05:50:40.441 btags += '+' * lb
2025-07-01 05:50:40.447 elif tag == 'equal':
2025-07-01 05:50:40.453 atags += ' ' * la
2025-07-01 05:50:40.460 btags += ' ' * lb
2025-07-01 05:50:40.466 else:
2025-07-01 05:50:40.472 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:50:40.476 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:50:40.480 else:
2025-07-01 05:50:40.485 # the synch pair is identical
2025-07-01 05:50:40.496 yield ' ' + aelt
2025-07-01 05:50:40.506
2025-07-01 05:50:40.514 # pump out diffs from after the synch point
2025-07-01 05:50:40.522 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:50:40.532
2025-07-01 05:50:40.542 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:50:40.552 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:50:40.562
2025-07-01 05:50:40.571 self = <difflib.Differ object at [hex]>
2025-07-01 05:50:40.579 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:50:40.586 alo = 454, ahi = 1101
2025-07-01 05:50:40.593 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:50:40.599 blo = 454, bhi = 1101
2025-07-01 05:50:40.604
2025-07-01 05:50:40.610 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:50:40.620 g = []
2025-07-01 05:50:40.629 if alo < ahi:
2025-07-01 05:50:40.637 if blo < bhi:
2025-07-01 05:50:40.643 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:50:40.650 else:
2025-07-01 05:50:40.656 g = self._dump('-', a, alo, ahi)
2025-07-01 05:50:40.662 elif blo < bhi:
2025-07-01 05:50:40.668 g = self._dump('+', b, blo, bhi)
2025-07-01 05:50:40.675
2025-07-01 05:50:40.687 > yield from g
2025-07-01 05:50:40.698
2025-07-01 05:50:40.709 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:50:40.721 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:50:40.732
2025-07-01 05:50:40.742 self = <difflib.Differ object at [hex]>
2025-07-01 05:50:40.755 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:50:40.764 alo = 454, ahi = 1101
2025-07-01 05:50:40.778 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:50:40.786 blo = 454, bhi = 1101
2025-07-01 05:50:40.792
2025-07-01 05:50:40.802 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:50:40.815 r"""
2025-07-01 05:50:40.828 When replacing one block of lines with another, search the blocks
2025-07-01 05:50:40.839 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:50:40.847 synch point, and intraline difference marking is done on the
2025-07-01 05:50:40.861 similar pair. Lots of work, but often worth it.
2025-07-01 05:50:40.872
2025-07-01 05:50:40.881 Example:
2025-07-01 05:50:40.889
2025-07-01 05:50:40.897 >>> d = Differ()
2025-07-01 05:50:40.904 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:50:40.911 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:50:40.918 >>> print(''.join(results), end="")
2025-07-01 05:50:40.925 - abcDefghiJkl
2025-07-01 05:50:40.939 + abcdefGhijkl
2025-07-01 05:50:40.961 """
2025-07-01 05:50:40.972
2025-07-01 05:50:40.983 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:50:40.996 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:50:41.009 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:50:41.019 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:50:41.029 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:50:41.040
2025-07-01 05:50:41.050 # search for the pair that matches best without being identical
2025-07-01 05:50:41.057 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:50:41.064 # on junk -- unless we have to)
2025-07-01 05:50:41.071 for j in range(blo, bhi):
2025-07-01 05:50:41.077 bj = b[j]
2025-07-01 05:50:41.083 cruncher.set_seq2(bj)
2025-07-01 05:50:41.090 for i in range(alo, ahi):
2025-07-01 05:50:41.102 ai = a[i]
2025-07-01 05:50:41.113 if ai == bj:
2025-07-01 05:50:41.122 if eqi is None:
2025-07-01 05:50:41.132 eqi, eqj = i, j
2025-07-01 05:50:41.142 continue
2025-07-01 05:50:41.151 cruncher.set_seq1(ai)
2025-07-01 05:50:41.159 # computing similarity is expensive, so use the quick
2025-07-01 05:50:41.171 # upper bounds first -- have seen this speed up messy
2025-07-01 05:50:41.180 # compares by a factor of 3.
2025-07-01 05:50:41.187 # note that ratio() is only expensive to compute the first
2025-07-01 05:50:41.194 # time it's called on a sequence pair; the expensive part
2025-07-01 05:50:41.201 # of the computation is cached by cruncher
2025-07-01 05:50:41.213 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:50:41.222 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:50:41.231 cruncher.ratio() > best_ratio:
2025-07-01 05:50:41.242 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:50:41.251 if best_ratio < cutoff:
2025-07-01 05:50:41.259 # no non-identical "pretty close" pair
2025-07-01 05:50:41.267 if eqi is None:
2025-07-01 05:50:41.278 # no identical pair either -- treat it as a straight replace
2025-07-01 05:50:41.287 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:50:41.295 return
2025-07-01 05:50:41.309 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:50:41.323 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:50:41.331 else:
2025-07-01 05:50:41.339 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:50:41.345 eqi = None
2025-07-01 05:50:41.352
2025-07-01 05:50:41.359 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:50:41.365 # identical
2025-07-01 05:50:41.371
2025-07-01 05:50:41.379 # pump out diffs from before the synch point
2025-07-01 05:50:41.393 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:50:41.403
2025-07-01 05:50:41.412 # do intraline marking on the synch pair
2025-07-01 05:50:41.421 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:50:41.435 if eqi is None:
2025-07-01 05:50:41.445 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:50:41.457 atags = btags = ""
2025-07-01 05:50:41.469 cruncher.set_seqs(aelt, belt)
2025-07-01 05:50:41.479 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:50:41.491 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:50:41.501 if tag == 'replace':
2025-07-01 05:50:41.514 atags += '^' * la
2025-07-01 05:50:41.527 btags += '^' * lb
2025-07-01 05:50:41.538 elif tag == 'delete':
2025-07-01 05:50:41.547 atags += '-' * la
2025-07-01 05:50:41.555 elif tag == 'insert':
2025-07-01 05:50:41.562 btags += '+' * lb
2025-07-01 05:50:41.568 elif tag == 'equal':
2025-07-01 05:50:41.576 atags += ' ' * la
2025-07-01 05:50:41.590 btags += ' ' * lb
2025-07-01 05:50:41.602 else:
2025-07-01 05:50:41.615 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:50:41.627 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:50:41.635 else:
2025-07-01 05:50:41.643 # the synch pair is identical
2025-07-01 05:50:41.653 yield ' ' + aelt
2025-07-01 05:50:41.666
2025-07-01 05:50:41.675 # pump out diffs from after the synch point
2025-07-01 05:50:41.682 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:50:41.688
2025-07-01 05:50:41.695 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:50:41.702 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:50:41.713
2025-07-01 05:50:41.726 self = <difflib.Differ object at [hex]>
2025-07-01 05:50:41.737 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:50:41.743 alo = 455, ahi = 1101
2025-07-01 05:50:41.749 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:50:41.764 blo = 455, bhi = 1101
2025-07-01 05:50:41.776
2025-07-01 05:50:41.786 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:50:41.794 g = []
2025-07-01 05:50:41.803 if alo < ahi:
2025-07-01 05:50:41.811 if blo < bhi:
2025-07-01 05:50:41.819 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:50:41.831 else:
2025-07-01 05:50:41.841 g = self._dump('-', a, alo, ahi)
2025-07-01 05:50:41.851 elif blo < bhi:
2025-07-01 05:50:41.863 g = self._dump('+', b, blo, bhi)
2025-07-01 05:50:41.872
2025-07-01 05:50:41.879 > yield from g
2025-07-01 05:50:41.887
2025-07-01 05:50:41.894 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:50:41.900 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:50:41.906
2025-07-01 05:50:41.913 self = <difflib.Differ object at [hex]>
2025-07-01 05:50:41.924 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:50:41.938 alo = 455, ahi = 1101
2025-07-01 05:50:41.949 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:50:41.957 blo = 455, bhi = 1101
2025-07-01 05:50:41.963
2025-07-01 05:50:41.974 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:50:41.986 r"""
2025-07-01 05:50:41.996 When replacing one block of lines with another, search the blocks
2025-07-01 05:50:42.004 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:50:42.011 synch point, and intraline difference marking is done on the
2025-07-01 05:50:42.019 similar pair. Lots of work, but often worth it.
2025-07-01 05:50:42.025
2025-07-01 05:50:42.031 Example:
2025-07-01 05:50:42.036
2025-07-01 05:50:42.041 >>> d = Differ()
2025-07-01 05:50:42.046 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:50:42.052 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:50:42.057 >>> print(''.join(results), end="")
2025-07-01 05:50:42.063 - abcDefghiJkl
2025-07-01 05:50:42.074 + abcdefGhijkl
2025-07-01 05:50:42.085 """
2025-07-01 05:50:42.091
2025-07-01 05:50:42.098 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:50:42.106 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:50:42.116 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:50:42.125 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:50:42.133 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:50:42.138
2025-07-01 05:50:42.145 # search for the pair that matches best without being identical
2025-07-01 05:50:42.150 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:50:42.156 # on junk -- unless we have to)
2025-07-01 05:50:42.161 for j in range(blo, bhi):
2025-07-01 05:50:42.167 bj = b[j]
2025-07-01 05:50:42.174 cruncher.set_seq2(bj)
2025-07-01 05:50:42.184 for i in range(alo, ahi):
2025-07-01 05:50:42.195 ai = a[i]
2025-07-01 05:50:42.204 if ai == bj:
2025-07-01 05:50:42.211 if eqi is None:
2025-07-01 05:50:42.217 eqi, eqj = i, j
2025-07-01 05:50:42.222 continue
2025-07-01 05:50:42.232 cruncher.set_seq1(ai)
2025-07-01 05:50:42.242 # computing similarity is expensive, so use the quick
2025-07-01 05:50:42.248 # upper bounds first -- have seen this speed up messy
2025-07-01 05:50:42.254 # compares by a factor of 3.
2025-07-01 05:50:42.263 # note that ratio() is only expensive to compute the first
2025-07-01 05:50:42.274 # time it's called on a sequence pair; the expensive part
2025-07-01 05:50:42.282 # of the computation is cached by cruncher
2025-07-01 05:50:42.289 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:50:42.295 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:50:42.300 cruncher.ratio() > best_ratio:
2025-07-01 05:50:42.306 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:50:42.312 if best_ratio < cutoff:
2025-07-01 05:50:42.319 # no non-identical "pretty close" pair
2025-07-01 05:50:42.326 if eqi is None:
2025-07-01 05:50:42.337 # no identical pair either -- treat it as a straight replace
2025-07-01 05:50:42.346 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:50:42.353 return
2025-07-01 05:50:42.359 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:50:42.365 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:50:42.371 else:
2025-07-01 05:50:42.378 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:50:42.385 eqi = None
2025-07-01 05:50:42.391
2025-07-01 05:50:42.399 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:50:42.407 # identical
2025-07-01 05:50:42.418
2025-07-01 05:50:42.426 # pump out diffs from before the synch point
2025-07-01 05:50:42.435 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:50:42.446
2025-07-01 05:50:42.453 # do intraline marking on the synch pair
2025-07-01 05:50:42.459 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:50:42.466 if eqi is None:
2025-07-01 05:50:42.477 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:50:42.486 atags = btags = ""
2025-07-01 05:50:42.493 cruncher.set_seqs(aelt, belt)
2025-07-01 05:50:42.499 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:50:42.504 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:50:42.509 if tag == 'replace':
2025-07-01 05:50:42.515 atags += '^' * la
2025-07-01 05:50:42.520 btags += '^' * lb
2025-07-01 05:50:42.525 elif tag == 'delete':
2025-07-01 05:50:42.532 atags += '-' * la
2025-07-01 05:50:42.539 elif tag == 'insert':
2025-07-01 05:50:42.545 btags += '+' * lb
2025-07-01 05:50:42.550 elif tag == 'equal':
2025-07-01 05:50:42.554 atags += ' ' * la
2025-07-01 05:50:42.559 btags += ' ' * lb
2025-07-01 05:50:42.564 else:
2025-07-01 05:50:42.568 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:50:42.573 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:50:42.578 else:
2025-07-01 05:50:42.583 # the synch pair is identical
2025-07-01 05:50:42.588 yield ' ' + aelt
2025-07-01 05:50:42.594
2025-07-01 05:50:42.599 # pump out diffs from after the synch point
2025-07-01 05:50:42.605 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:50:42.611
2025-07-01 05:50:42.619 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:50:42.626 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:50:42.633
2025-07-01 05:50:42.639 self = <difflib.Differ object at [hex]>
2025-07-01 05:50:42.644 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:50:42.649 alo = 456, ahi = 1101
2025-07-01 05:50:42.655 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:50:42.659 blo = 456, bhi = 1101
2025-07-01 05:50:42.669
2025-07-01 05:50:42.676 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:50:42.683 g = []
2025-07-01 05:50:42.690 if alo < ahi:
2025-07-01 05:50:42.697 if blo < bhi:
2025-07-01 05:50:42.703 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:50:42.708 else:
2025-07-01 05:50:42.714 g = self._dump('-', a, alo, ahi)
2025-07-01 05:50:42.720 elif blo < bhi:
2025-07-01 05:50:42.726 g = self._dump('+', b, blo, bhi)
2025-07-01 05:50:42.732
2025-07-01 05:50:42.738 > yield from g
2025-07-01 05:50:42.744
2025-07-01 05:50:42.750 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:50:42.756 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:50:42.761
2025-07-01 05:50:42.767 self = <difflib.Differ object at [hex]>
2025-07-01 05:50:42.774 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:50:42.780 alo = 456, ahi = 1101
2025-07-01 05:50:42.786 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:50:42.792 blo = 456, bhi = 1101
2025-07-01 05:50:42.798
2025-07-01 05:50:42.804 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:50:42.810 r"""
2025-07-01 05:50:42.815 When replacing one block of lines with another, search the blocks
2025-07-01 05:50:42.821 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:50:42.828 synch point, and intraline difference marking is done on the
2025-07-01 05:50:42.841 similar pair. Lots of work, but often worth it.
2025-07-01 05:50:42.852
2025-07-01 05:50:42.861 Example:
2025-07-01 05:50:42.867
2025-07-01 05:50:42.873 >>> d = Differ()
2025-07-01 05:50:42.878 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:50:42.884 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:50:42.889 >>> print(''.join(results), end="")
2025-07-01 05:50:42.895 - abcDefghiJkl
2025-07-01 05:50:42.905 + abcdefGhijkl
2025-07-01 05:50:42.914 """
2025-07-01 05:50:42.919
2025-07-01 05:50:42.925 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:50:42.931 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:50:42.938 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:50:42.945 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:50:42.952 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:50:42.959
2025-07-01 05:50:42.967 # search for the pair that matches best without being identical
2025-07-01 05:50:42.978 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:50:42.988 # on junk -- unless we have to)
2025-07-01 05:50:42.995 for j in range(blo, bhi):
2025-07-01 05:50:43.001 bj = b[j]
2025-07-01 05:50:43.007 cruncher.set_seq2(bj)
2025-07-01 05:50:43.013 for i in range(alo, ahi):
2025-07-01 05:50:43.018 ai = a[i]
2025-07-01 05:50:43.024 if ai == bj:
2025-07-01 05:50:43.029 if eqi is None:
2025-07-01 05:50:43.035 eqi, eqj = i, j
2025-07-01 05:50:43.040 continue
2025-07-01 05:50:43.048 cruncher.set_seq1(ai)
2025-07-01 05:50:43.060 # computing similarity is expensive, so use the quick
2025-07-01 05:50:43.068 # upper bounds first -- have seen this speed up messy
2025-07-01 05:50:43.075 # compares by a factor of 3.
2025-07-01 05:50:43.082 # note that ratio() is only expensive to compute the first
2025-07-01 05:50:43.088 # time it's called on a sequence pair; the expensive part
2025-07-01 05:50:43.094 # of the computation is cached by cruncher
2025-07-01 05:50:43.100 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:50:43.106 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:50:43.113 cruncher.ratio() > best_ratio:
2025-07-01 05:50:43.120 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:50:43.127 if best_ratio < cutoff:
2025-07-01 05:50:43.135 # no non-identical "pretty close" pair
2025-07-01 05:50:43.145 if eqi is None:
2025-07-01 05:50:43.154 # no identical pair either -- treat it as a straight replace
2025-07-01 05:50:43.160 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:50:43.166 return
2025-07-01 05:50:43.171 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:50:43.176 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:50:43.181 else:
2025-07-01 05:50:43.186 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:50:43.191 eqi = None
2025-07-01 05:50:43.196
2025-07-01 05:50:43.202 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:50:43.209 # identical
2025-07-01 05:50:43.215
2025-07-01 05:50:43.222 # pump out diffs from before the synch point
2025-07-01 05:50:43.229 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:50:43.235
2025-07-01 05:50:43.242 # do intraline marking on the synch pair
2025-07-01 05:50:43.252 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:50:43.262 if eqi is None:
2025-07-01 05:50:43.270 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:50:43.278 atags = btags = ""
2025-07-01 05:50:43.289 cruncher.set_seqs(aelt, belt)
2025-07-01 05:50:43.299 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:50:43.308 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:50:43.315 if tag == 'replace':
2025-07-01 05:50:43.321 atags += '^' * la
2025-07-01 05:50:43.327 btags += '^' * lb
2025-07-01 05:50:43.332 elif tag == 'delete':
2025-07-01 05:50:43.338 atags += '-' * la
2025-07-01 05:50:43.348 elif tag == 'insert':
2025-07-01 05:50:43.358 btags += '+' * lb
2025-07-01 05:50:43.365 elif tag == 'equal':
2025-07-01 05:50:43.370 atags += ' ' * la
2025-07-01 05:50:43.376 btags += ' ' * lb
2025-07-01 05:50:43.383 else:
2025-07-01 05:50:43.394 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:50:43.401 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:50:43.407 else:
2025-07-01 05:50:43.413 # the synch pair is identical
2025-07-01 05:50:43.420 yield ' ' + aelt
2025-07-01 05:50:43.426
2025-07-01 05:50:43.436 # pump out diffs from after the synch point
2025-07-01 05:50:43.447 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:50:43.454
2025-07-01 05:50:43.460 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:50:43.472 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:50:43.484
2025-07-01 05:50:43.495 self = <difflib.Differ object at [hex]>
2025-07-01 05:50:43.504 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:50:43.511 alo = 457, ahi = 1101
2025-07-01 05:50:43.518 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:50:43.523 blo = 457, bhi = 1101
2025-07-01 05:50:43.532
2025-07-01 05:50:43.543 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:50:43.552 g = []
2025-07-01 05:50:43.561 if alo < ahi:
2025-07-01 05:50:43.569 if blo < bhi:
2025-07-01 05:50:43.575 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:50:43.582 else:
2025-07-01 05:50:43.590 g = self._dump('-', a, alo, ahi)
2025-07-01 05:50:43.599 elif blo < bhi:
2025-07-01 05:50:43.607 g = self._dump('+', b, blo, bhi)
2025-07-01 05:50:43.614
2025-07-01 05:50:43.622 > yield from g
2025-07-01 05:50:43.633
2025-07-01 05:50:43.641 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:50:43.648 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:50:43.655
2025-07-01 05:50:43.662 self = <difflib.Differ object at [hex]>
2025-07-01 05:50:43.673 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:50:43.685 alo = 457, ahi = 1101
2025-07-01 05:50:43.698 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:50:43.707 blo = 457, bhi = 1101
2025-07-01 05:50:43.714
2025-07-01 05:50:43.725 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:50:43.738 r"""
2025-07-01 05:50:43.749 When replacing one block of lines with another, search the blocks
2025-07-01 05:50:43.755 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:50:43.760 synch point, and intraline difference marking is done on the
2025-07-01 05:50:43.767 similar pair. Lots of work, but often worth it.
2025-07-01 05:50:43.773
2025-07-01 05:50:43.779 Example:
2025-07-01 05:50:43.785
2025-07-01 05:50:43.790 >>> d = Differ()
2025-07-01 05:50:43.800 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:50:43.809 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:50:43.815 >>> print(''.join(results), end="")
2025-07-01 05:50:43.820 - abcDefghiJkl
2025-07-01 05:50:43.832 + abcdefGhijkl
2025-07-01 05:50:43.843 """
2025-07-01 05:50:43.849
2025-07-01 05:50:43.853 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:50:43.858 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:50:43.863 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:50:43.868 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:50:43.873 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:50:43.879
2025-07-01 05:50:43.885 # search for the pair that matches best without being identical
2025-07-01 05:50:43.891 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:50:43.899 # on junk -- unless we have to)
2025-07-01 05:50:43.908 for j in range(blo, bhi):
2025-07-01 05:50:43.914 bj = b[j]
2025-07-01 05:50:43.920 cruncher.set_seq2(bj)
2025-07-01 05:50:43.925 for i in range(alo, ahi):
2025-07-01 05:50:43.934 ai = a[i]
2025-07-01 05:50:43.940 if ai == bj:
2025-07-01 05:50:43.947 if eqi is None:
2025-07-01 05:50:43.953 eqi, eqj = i, j
2025-07-01 05:50:43.959 continue
2025-07-01 05:50:43.964 cruncher.set_seq1(ai)
2025-07-01 05:50:43.970 # computing similarity is expensive, so use the quick
2025-07-01 05:50:43.977 # upper bounds first -- have seen this speed up messy
2025-07-01 05:50:43.986 # compares by a factor of 3.
2025-07-01 05:50:43.993 # note that ratio() is only expensive to compute the first
2025-07-01 05:50:44.000 # time it's called on a sequence pair; the expensive part
2025-07-01 05:50:44.007 # of the computation is cached by cruncher
2025-07-01 05:50:44.012 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:50:44.018 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:50:44.023 cruncher.ratio() > best_ratio:
2025-07-01 05:50:44.029 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:50:44.036 if best_ratio < cutoff:
2025-07-01 05:50:44.043 # no non-identical "pretty close" pair
2025-07-01 05:50:44.050 if eqi is None:
2025-07-01 05:50:44.060 # no identical pair either -- treat it as a straight replace
2025-07-01 05:50:44.070 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:50:44.077 return
2025-07-01 05:50:44.083 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:50:44.089 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:50:44.095 else:
2025-07-01 05:50:44.103 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:50:44.113 eqi = None
2025-07-01 05:50:44.124
2025-07-01 05:50:44.132 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:50:44.139 # identical
2025-07-01 05:50:44.144
2025-07-01 05:50:44.157 # pump out diffs from before the synch point
2025-07-01 05:50:44.170 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:50:44.181
2025-07-01 05:50:44.194 # do intraline marking on the synch pair
2025-07-01 05:50:44.205 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:50:44.216 if eqi is None:
2025-07-01 05:50:44.226 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:50:44.235 atags = btags = ""
2025-07-01 05:50:44.245 cruncher.set_seqs(aelt, belt)
2025-07-01 05:50:44.255 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:50:44.263 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:50:44.272 if tag == 'replace':
2025-07-01 05:50:44.283 atags += '^' * la
2025-07-01 05:50:44.290 btags += '^' * lb
2025-07-01 05:50:44.297 elif tag == 'delete':
2025-07-01 05:50:44.303 atags += '-' * la
2025-07-01 05:50:44.311 elif tag == 'insert':
2025-07-01 05:50:44.321 btags += '+' * lb
2025-07-01 05:50:44.329 elif tag == 'equal':
2025-07-01 05:50:44.342 atags += ' ' * la
2025-07-01 05:50:44.352 btags += ' ' * lb
2025-07-01 05:50:44.360 else:
2025-07-01 05:50:44.367 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:50:44.373 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:50:44.384 else:
2025-07-01 05:50:44.393 # the synch pair is identical
2025-07-01 05:50:44.402 yield ' ' + aelt
2025-07-01 05:50:44.413
2025-07-01 05:50:44.423 # pump out diffs from after the synch point
2025-07-01 05:50:44.435 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:50:44.443
2025-07-01 05:50:44.449 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:50:44.460 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:50:44.467
2025-07-01 05:50:44.480 self = <difflib.Differ object at [hex]>
2025-07-01 05:50:44.491 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:50:44.502 alo = 458, ahi = 1101
2025-07-01 05:50:44.513 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:50:44.524 blo = 458, bhi = 1101
2025-07-01 05:50:44.534
2025-07-01 05:50:44.546 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:50:44.556 g = []
2025-07-01 05:50:44.564 if alo < ahi:
2025-07-01 05:50:44.571 if blo < bhi:
2025-07-01 05:50:44.582 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:50:44.592 else:
2025-07-01 05:50:44.601 g = self._dump('-', a, alo, ahi)
2025-07-01 05:50:44.608 elif blo < bhi:
2025-07-01 05:50:44.615 g = self._dump('+', b, blo, bhi)
2025-07-01 05:50:44.621
2025-07-01 05:50:44.629 > yield from g
2025-07-01 05:50:44.635
2025-07-01 05:50:44.642 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:50:44.652 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:50:44.660
2025-07-01 05:50:44.667 self = <difflib.Differ object at [hex]>
2025-07-01 05:50:44.673 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:50:44.678 alo = 458, ahi = 1101
2025-07-01 05:50:44.684 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:50:44.691 blo = 458, bhi = 1101
2025-07-01 05:50:44.701
2025-07-01 05:50:44.709 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:50:44.716 r"""
2025-07-01 05:50:44.722 When replacing one block of lines with another, search the blocks
2025-07-01 05:50:44.735 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:50:44.746 synch point, and intraline difference marking is done on the
2025-07-01 05:50:44.759 similar pair. Lots of work, but often worth it.
2025-07-01 05:50:44.768
2025-07-01 05:50:44.780 Example:
2025-07-01 05:50:44.790
2025-07-01 05:50:44.798 >>> d = Differ()
2025-07-01 05:50:44.807 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:50:44.814 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:50:44.820 >>> print(''.join(results), end="")
2025-07-01 05:50:44.826 - abcDefghiJkl
2025-07-01 05:50:44.837 + abcdefGhijkl
2025-07-01 05:50:44.851 """
2025-07-01 05:50:44.863
2025-07-01 05:50:44.872 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:50:44.879 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:50:44.887 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:50:44.895 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:50:44.904 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:50:44.911
2025-07-01 05:50:44.918 # search for the pair that matches best without being identical
2025-07-01 05:50:44.923 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:50:44.929 # on junk -- unless we have to)
2025-07-01 05:50:44.934 for j in range(blo, bhi):
2025-07-01 05:50:44.940 bj = b[j]
2025-07-01 05:50:44.945 cruncher.set_seq2(bj)
2025-07-01 05:50:44.952 for i in range(alo, ahi):
2025-07-01 05:50:44.958 ai = a[i]
2025-07-01 05:50:44.966 if ai == bj:
2025-07-01 05:50:44.974 if eqi is None:
2025-07-01 05:50:44.980 eqi, eqj = i, j
2025-07-01 05:50:44.986 continue
2025-07-01 05:50:44.992 cruncher.set_seq1(ai)
2025-07-01 05:50:44.999 # computing similarity is expensive, so use the quick
2025-07-01 05:50:45.011 # upper bounds first -- have seen this speed up messy
2025-07-01 05:50:45.022 # compares by a factor of 3.
2025-07-01 05:50:45.034 # note that ratio() is only expensive to compute the first
2025-07-01 05:50:45.044 # time it's called on a sequence pair; the expensive part
2025-07-01 05:50:45.053 # of the computation is cached by cruncher
2025-07-01 05:50:45.062 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:50:45.069 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:50:45.075 cruncher.ratio() > best_ratio:
2025-07-01 05:50:45.081 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:50:45.087 if best_ratio < cutoff:
2025-07-01 05:50:45.093 # no non-identical "pretty close" pair
2025-07-01 05:50:45.099 if eqi is None:
2025-07-01 05:50:45.105 # no identical pair either -- treat it as a straight replace
2025-07-01 05:50:45.111 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:50:45.116 return
2025-07-01 05:50:45.126 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:50:45.135 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:50:45.143 else:
2025-07-01 05:50:45.150 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:50:45.161 eqi = None
2025-07-01 05:50:45.168
2025-07-01 05:50:45.175 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:50:45.181 # identical
2025-07-01 05:50:45.186
2025-07-01 05:50:45.192 # pump out diffs from before the synch point
2025-07-01 05:50:45.198 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:50:45.203
2025-07-01 05:50:45.212 # do intraline marking on the synch pair
2025-07-01 05:50:45.224 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:50:45.233 if eqi is None:
2025-07-01 05:50:45.239 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:50:45.247 atags = btags = ""
2025-07-01 05:50:45.256 cruncher.set_seqs(aelt, belt)
2025-07-01 05:50:45.265 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:50:45.273 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:50:45.280 if tag == 'replace':
2025-07-01 05:50:45.286 atags += '^' * la
2025-07-01 05:50:45.291 btags += '^' * lb
2025-07-01 05:50:45.297 elif tag == 'delete':
2025-07-01 05:50:45.303 atags += '-' * la
2025-07-01 05:50:45.310 elif tag == 'insert':
2025-07-01 05:50:45.321 btags += '+' * lb
2025-07-01 05:50:45.330 elif tag == 'equal':
2025-07-01 05:50:45.340 atags += ' ' * la
2025-07-01 05:50:45.351 btags += ' ' * lb
2025-07-01 05:50:45.363 else:
2025-07-01 05:50:45.372 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:50:45.381 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:50:45.389 else:
2025-07-01 05:50:45.396 # the synch pair is identical
2025-07-01 05:50:45.402 yield ' ' + aelt
2025-07-01 05:50:45.411
2025-07-01 05:50:45.418 # pump out diffs from after the synch point
2025-07-01 05:50:45.428 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:50:45.439
2025-07-01 05:50:45.447 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:50:45.454 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:50:45.461
2025-07-01 05:50:45.467 self = <difflib.Differ object at [hex]>
2025-07-01 05:50:45.475 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:50:45.487 alo = 459, ahi = 1101
2025-07-01 05:50:45.497 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:50:45.509 blo = 459, bhi = 1101
2025-07-01 05:50:45.520
2025-07-01 05:50:45.528 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:50:45.535 g = []
2025-07-01 05:50:45.541 if alo < ahi:
2025-07-01 05:50:45.548 if blo < bhi:
2025-07-01 05:50:45.554 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:50:45.565 else:
2025-07-01 05:50:45.573 g = self._dump('-', a, alo, ahi)
2025-07-01 05:50:45.580 elif blo < bhi:
2025-07-01 05:50:45.587 g = self._dump('+', b, blo, bhi)
2025-07-01 05:50:45.595
2025-07-01 05:50:45.606 > yield from g
2025-07-01 05:50:45.617
2025-07-01 05:50:45.627 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:50:45.639 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:50:45.648
2025-07-01 05:50:45.659 self = <difflib.Differ object at [hex]>
2025-07-01 05:50:45.669 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:50:45.681 alo = 459, ahi = 1101
2025-07-01 05:50:45.693 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:50:45.705 blo = 459, bhi = 1101
2025-07-01 05:50:45.714
2025-07-01 05:50:45.723 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:50:45.730 r"""
2025-07-01 05:50:45.740 When replacing one block of lines with another, search the blocks
2025-07-01 05:50:45.753 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:50:45.764 synch point, and intraline difference marking is done on the
2025-07-01 05:50:45.772 similar pair. Lots of work, but often worth it.
2025-07-01 05:50:45.780
2025-07-01 05:50:45.786 Example:
2025-07-01 05:50:45.792
2025-07-01 05:50:45.798 >>> d = Differ()
2025-07-01 05:50:45.805 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:50:45.819 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:50:45.828 >>> print(''.join(results), end="")
2025-07-01 05:50:45.836 - abcDefghiJkl
2025-07-01 05:50:45.850 + abcdefGhijkl
2025-07-01 05:50:45.871 """
2025-07-01 05:50:45.883
2025-07-01 05:50:45.894 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:50:45.906 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:50:45.919 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:50:45.930 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:50:45.939 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:50:45.950
2025-07-01 05:50:45.960 # search for the pair that matches best without being identical
2025-07-01 05:50:45.970 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:50:45.977 # on junk -- unless we have to)
2025-07-01 05:50:45.988 for j in range(blo, bhi):
2025-07-01 05:50:45.998 bj = b[j]
2025-07-01 05:50:46.007 cruncher.set_seq2(bj)
2025-07-01 05:50:46.018 for i in range(alo, ahi):
2025-07-01 05:50:46.031 ai = a[i]
2025-07-01 05:50:46.040 if ai == bj:
2025-07-01 05:50:46.048 if eqi is None:
2025-07-01 05:50:46.054 eqi, eqj = i, j
2025-07-01 05:50:46.062 continue
2025-07-01 05:50:46.073 cruncher.set_seq1(ai)
2025-07-01 05:50:46.086 # computing similarity is expensive, so use the quick
2025-07-01 05:50:46.099 # upper bounds first -- have seen this speed up messy
2025-07-01 05:50:46.110 # compares by a factor of 3.
2025-07-01 05:50:46.123 # note that ratio() is only expensive to compute the first
2025-07-01 05:50:46.133 # time it's called on a sequence pair; the expensive part
2025-07-01 05:50:46.143 # of the computation is cached by cruncher
2025-07-01 05:50:46.152 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:50:46.163 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:50:46.175 cruncher.ratio() > best_ratio:
2025-07-01 05:50:46.185 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:50:46.197 if best_ratio < cutoff:
2025-07-01 05:50:46.207 # no non-identical "pretty close" pair
2025-07-01 05:50:46.217 if eqi is None:
2025-07-01 05:50:46.224 # no identical pair either -- treat it as a straight replace
2025-07-01 05:50:46.231 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:50:46.238 return
2025-07-01 05:50:46.245 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:50:46.252 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:50:46.258 else:
2025-07-01 05:50:46.265 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:50:46.279 eqi = None
2025-07-01 05:50:46.290
2025-07-01 05:50:46.302 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:50:46.316 # identical
2025-07-01 05:50:46.329
2025-07-01 05:50:46.342 # pump out diffs from before the synch point
2025-07-01 05:50:46.355 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:50:46.364
2025-07-01 05:50:46.371 # do intraline marking on the synch pair
2025-07-01 05:50:46.378 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:50:46.387 if eqi is None:
2025-07-01 05:50:46.397 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:50:46.405 atags = btags = ""
2025-07-01 05:50:46.411 cruncher.set_seqs(aelt, belt)
2025-07-01 05:50:46.419 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:50:46.429 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:50:46.439 if tag == 'replace':
2025-07-01 05:50:46.450 atags += '^' * la
2025-07-01 05:50:46.459 btags += '^' * lb
2025-07-01 05:50:46.468 elif tag == 'delete':
2025-07-01 05:50:46.475 atags += '-' * la
2025-07-01 05:50:46.482 elif tag == 'insert':
2025-07-01 05:50:46.492 btags += '+' * lb
2025-07-01 05:50:46.501 elif tag == 'equal':
2025-07-01 05:50:46.509 atags += ' ' * la
2025-07-01 05:50:46.516 btags += ' ' * lb
2025-07-01 05:50:46.522 else:
2025-07-01 05:50:46.533 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:50:46.542 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:50:46.550 else:
2025-07-01 05:50:46.560 # the synch pair is identical
2025-07-01 05:50:46.571 yield ' ' + aelt
2025-07-01 05:50:46.581
2025-07-01 05:50:46.589 # pump out diffs from after the synch point
2025-07-01 05:50:46.595 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:50:46.602
2025-07-01 05:50:46.608 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:50:46.615 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:50:46.623
2025-07-01 05:50:46.631 self = <difflib.Differ object at [hex]>
2025-07-01 05:50:46.642 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:50:46.652 alo = 460, ahi = 1101
2025-07-01 05:50:46.660 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:50:46.666 blo = 460, bhi = 1101
2025-07-01 05:50:46.671
2025-07-01 05:50:46.677 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:50:46.683 g = []
2025-07-01 05:50:46.689 if alo < ahi:
2025-07-01 05:50:46.695 if blo < bhi:
2025-07-01 05:50:46.702 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:50:46.714 else:
2025-07-01 05:50:46.724 g = self._dump('-', a, alo, ahi)
2025-07-01 05:50:46.732 elif blo < bhi:
2025-07-01 05:50:46.739 g = self._dump('+', b, blo, bhi)
2025-07-01 05:50:46.746
2025-07-01 05:50:46.752 > yield from g
2025-07-01 05:50:46.757
2025-07-01 05:50:46.763 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:50:46.769 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:50:46.775
2025-07-01 05:50:46.780 self = <difflib.Differ object at [hex]>
2025-07-01 05:50:46.786 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:50:46.793 alo = 460, ahi = 1101
2025-07-01 05:50:46.801 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:50:46.807 blo = 460, bhi = 1101
2025-07-01 05:50:46.814
2025-07-01 05:50:46.823 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:50:46.830 r"""
2025-07-01 05:50:46.837 When replacing one block of lines with another, search the blocks
2025-07-01 05:50:46.849 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:50:46.857 synch point, and intraline difference marking is done on the
2025-07-01 05:50:46.863 similar pair. Lots of work, but often worth it.
2025-07-01 05:50:46.870
2025-07-01 05:50:46.876 Example:
2025-07-01 05:50:46.884
2025-07-01 05:50:46.891 >>> d = Differ()
2025-07-01 05:50:46.898 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:50:46.906 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:50:46.916 >>> print(''.join(results), end="")
2025-07-01 05:50:46.926 - abcDefghiJkl
2025-07-01 05:50:46.941 + abcdefGhijkl
2025-07-01 05:50:46.955 """
2025-07-01 05:50:46.962
2025-07-01 05:50:46.971 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:50:46.980 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:50:46.988 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:50:46.994 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:50:46.999 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:50:47.004
2025-07-01 05:50:47.010 # search for the pair that matches best without being identical
2025-07-01 05:50:47.016 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:50:47.022 # on junk -- unless we have to)
2025-07-01 05:50:47.028 for j in range(blo, bhi):
2025-07-01 05:50:47.034 bj = b[j]
2025-07-01 05:50:47.043 cruncher.set_seq2(bj)
2025-07-01 05:50:47.054 for i in range(alo, ahi):
2025-07-01 05:50:47.063 ai = a[i]
2025-07-01 05:50:47.069 if ai == bj:
2025-07-01 05:50:47.075 if eqi is None:
2025-07-01 05:50:47.082 eqi, eqj = i, j
2025-07-01 05:50:47.088 continue
2025-07-01 05:50:47.095 cruncher.set_seq1(ai)
2025-07-01 05:50:47.103 # computing similarity is expensive, so use the quick
2025-07-01 05:50:47.110 # upper bounds first -- have seen this speed up messy
2025-07-01 05:50:47.118 # compares by a factor of 3.
2025-07-01 05:50:47.126 # note that ratio() is only expensive to compute the first
2025-07-01 05:50:47.137 # time it's called on a sequence pair; the expensive part
2025-07-01 05:50:47.144 # of the computation is cached by cruncher
2025-07-01 05:50:47.153 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:50:47.165 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:50:47.177 cruncher.ratio() > best_ratio:
2025-07-01 05:50:47.188 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:50:47.196 if best_ratio < cutoff:
2025-07-01 05:50:47.202 # no non-identical "pretty close" pair
2025-07-01 05:50:47.208 if eqi is None:
2025-07-01 05:50:47.214 # no identical pair either -- treat it as a straight replace
2025-07-01 05:50:47.221 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:50:47.228 return
2025-07-01 05:50:47.235 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:50:47.243 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:50:47.251 else:
2025-07-01 05:50:47.263 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:50:47.272 eqi = None
2025-07-01 05:50:47.283
2025-07-01 05:50:47.290 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:50:47.297 # identical
2025-07-01 05:50:47.304
2025-07-01 05:50:47.312 # pump out diffs from before the synch point
2025-07-01 05:50:47.319 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:50:47.326
2025-07-01 05:50:47.332 # do intraline marking on the synch pair
2025-07-01 05:50:47.338 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:50:47.344 if eqi is None:
2025-07-01 05:50:47.349 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:50:47.354 atags = btags = ""
2025-07-01 05:50:47.360 cruncher.set_seqs(aelt, belt)
2025-07-01 05:50:47.366 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:50:47.372 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:50:47.379 if tag == 'replace':
2025-07-01 05:50:47.387 atags += '^' * la
2025-07-01 05:50:47.399 btags += '^' * lb
2025-07-01 05:50:47.410 elif tag == 'delete':
2025-07-01 05:50:47.417 atags += '-' * la
2025-07-01 05:50:47.423 elif tag == 'insert':
2025-07-01 05:50:47.430 btags += '+' * lb
2025-07-01 05:50:47.440 elif tag == 'equal':
2025-07-01 05:50:47.451 atags += ' ' * la
2025-07-01 05:50:47.463 btags += ' ' * lb
2025-07-01 05:50:47.472 else:
2025-07-01 05:50:47.479 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:50:47.487 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:50:47.495 else:
2025-07-01 05:50:47.506 # the synch pair is identical
2025-07-01 05:50:47.518 yield ' ' + aelt
2025-07-01 05:50:47.528
2025-07-01 05:50:47.536 # pump out diffs from after the synch point
2025-07-01 05:50:47.543 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:50:47.551
2025-07-01 05:50:47.560 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:50:47.570 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:50:47.578
2025-07-01 05:50:47.584 self = <difflib.Differ object at [hex]>
2025-07-01 05:50:47.590 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:50:47.596 alo = 461, ahi = 1101
2025-07-01 05:50:47.602 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:50:47.611 blo = 461, bhi = 1101
2025-07-01 05:50:47.621
2025-07-01 05:50:47.629 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:50:47.635 g = []
2025-07-01 05:50:47.641 if alo < ahi:
2025-07-01 05:50:47.647 if blo < bhi:
2025-07-01 05:50:47.652 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:50:47.658 else:
2025-07-01 05:50:47.666 g = self._dump('-', a, alo, ahi)
2025-07-01 05:50:47.673 elif blo < bhi:
2025-07-01 05:50:47.680 g = self._dump('+', b, blo, bhi)
2025-07-01 05:50:47.685
2025-07-01 05:50:47.694 > yield from g
2025-07-01 05:50:47.705
2025-07-01 05:50:47.713 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:50:47.719 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:50:47.724
2025-07-01 05:50:47.732 self = <difflib.Differ object at [hex]>
2025-07-01 05:50:47.743 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:50:47.753 alo = 461, ahi = 1101
2025-07-01 05:50:47.764 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:50:47.772 blo = 461, bhi = 1101
2025-07-01 05:50:47.780
2025-07-01 05:50:47.791 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:50:47.797 r"""
2025-07-01 05:50:47.804 When replacing one block of lines with another, search the blocks
2025-07-01 05:50:47.811 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:50:47.816 synch point, and intraline difference marking is done on the
2025-07-01 05:50:47.821 similar pair. Lots of work, but often worth it.
2025-07-01 05:50:47.827
2025-07-01 05:50:47.831 Example:
2025-07-01 05:50:47.836
2025-07-01 05:50:47.840 >>> d = Differ()
2025-07-01 05:50:47.846 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:50:47.851 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:50:47.855 >>> print(''.join(results), end="")
2025-07-01 05:50:47.860 - abcDefghiJkl
2025-07-01 05:50:47.873 + abcdefGhijkl
2025-07-01 05:50:47.893 """
2025-07-01 05:50:47.899
2025-07-01 05:50:47.904 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:50:47.909 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:50:47.917 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:50:47.924 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:50:47.931 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:50:47.936
2025-07-01 05:50:47.942 # search for the pair that matches best without being identical
2025-07-01 05:50:47.951 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:50:47.960 # on junk -- unless we have to)
2025-07-01 05:50:47.967 for j in range(blo, bhi):
2025-07-01 05:50:47.974 bj = b[j]
2025-07-01 05:50:47.984 cruncher.set_seq2(bj)
2025-07-01 05:50:47.994 for i in range(alo, ahi):
2025-07-01 05:50:48.002 ai = a[i]
2025-07-01 05:50:48.008 if ai == bj:
2025-07-01 05:50:48.015 if eqi is None:
2025-07-01 05:50:48.022 eqi, eqj = i, j
2025-07-01 05:50:48.028 continue
2025-07-01 05:50:48.034 cruncher.set_seq1(ai)
2025-07-01 05:50:48.041 # computing similarity is expensive, so use the quick
2025-07-01 05:50:48.048 # upper bounds first -- have seen this speed up messy
2025-07-01 05:50:48.055 # compares by a factor of 3.
2025-07-01 05:50:48.063 # note that ratio() is only expensive to compute the first
2025-07-01 05:50:48.072 # time it's called on a sequence pair; the expensive part
2025-07-01 05:50:48.083 # of the computation is cached by cruncher
2025-07-01 05:50:48.092 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:50:48.099 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:50:48.105 cruncher.ratio() > best_ratio:
2025-07-01 05:50:48.111 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:50:48.122 if best_ratio < cutoff:
2025-07-01 05:50:48.129 # no non-identical "pretty close" pair
2025-07-01 05:50:48.135 if eqi is None:
2025-07-01 05:50:48.140 # no identical pair either -- treat it as a straight replace
2025-07-01 05:50:48.146 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:50:48.158 return
2025-07-01 05:50:48.165 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:50:48.170 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:50:48.175 else:
2025-07-01 05:50:48.181 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:50:48.185 eqi = None
2025-07-01 05:50:48.190
2025-07-01 05:50:48.196 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:50:48.202 # identical
2025-07-01 05:50:48.209
2025-07-01 05:50:48.217 # pump out diffs from before the synch point
2025-07-01 05:50:48.223 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:50:48.228
2025-07-01 05:50:48.234 # do intraline marking on the synch pair
2025-07-01 05:50:48.239 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:50:48.246 if eqi is None:
2025-07-01 05:50:48.255 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:50:48.266 atags = btags = ""
2025-07-01 05:50:48.275 cruncher.set_seqs(aelt, belt)
2025-07-01 05:50:48.283 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:50:48.291 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:50:48.303 if tag == 'replace':
2025-07-01 05:50:48.312 atags += '^' * la
2025-07-01 05:50:48.321 btags += '^' * lb
2025-07-01 05:50:48.328 elif tag == 'delete':
2025-07-01 05:50:48.337 atags += '-' * la
2025-07-01 05:50:48.349 elif tag == 'insert':
2025-07-01 05:50:48.361 btags += '+' * lb
2025-07-01 05:50:48.371 elif tag == 'equal':
2025-07-01 05:50:48.378 atags += ' ' * la
2025-07-01 05:50:48.387 btags += ' ' * lb
2025-07-01 05:50:48.397 else:
2025-07-01 05:50:48.406 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:50:48.411 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:50:48.417 else:
2025-07-01 05:50:48.423 # the synch pair is identical
2025-07-01 05:50:48.429 yield ' ' + aelt
2025-07-01 05:50:48.434
2025-07-01 05:50:48.440 # pump out diffs from after the synch point
2025-07-01 05:50:48.446 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:50:48.452
2025-07-01 05:50:48.457 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:50:48.463 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:50:48.469
2025-07-01 05:50:48.478 self = <difflib.Differ object at [hex]>
2025-07-01 05:50:48.491 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:50:48.503 alo = 462, ahi = 1101
2025-07-01 05:50:48.514 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:50:48.522 blo = 462, bhi = 1101
2025-07-01 05:50:48.528
2025-07-01 05:50:48.534 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:50:48.539 g = []
2025-07-01 05:50:48.544 if alo < ahi:
2025-07-01 05:50:48.550 if blo < bhi:
2025-07-01 05:50:48.561 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:50:48.570 else:
2025-07-01 05:50:48.577 g = self._dump('-', a, alo, ahi)
2025-07-01 05:50:48.582 elif blo < bhi:
2025-07-01 05:50:48.589 g = self._dump('+', b, blo, bhi)
2025-07-01 05:50:48.594
2025-07-01 05:50:48.600 > yield from g
2025-07-01 05:50:48.605
2025-07-01 05:50:48.611 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:50:48.624 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:50:48.633
2025-07-01 05:50:48.645 self = <difflib.Differ object at [hex]>
2025-07-01 05:50:48.657 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:50:48.664 alo = 462, ahi = 1101
2025-07-01 05:50:48.672 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:50:48.679 blo = 462, bhi = 1101
2025-07-01 05:50:48.686
2025-07-01 05:50:48.693 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:50:48.700 r"""
2025-07-01 05:50:48.707 When replacing one block of lines with another, search the blocks
2025-07-01 05:50:48.714 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:50:48.726 synch point, and intraline difference marking is done on the
2025-07-01 05:50:48.739 similar pair. Lots of work, but often worth it.
2025-07-01 05:50:48.751
2025-07-01 05:50:48.759 Example:
2025-07-01 05:50:48.765
2025-07-01 05:50:48.770 >>> d = Differ()
2025-07-01 05:50:48.775 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:50:48.780 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:50:48.785 >>> print(''.join(results), end="")
2025-07-01 05:50:48.790 - abcDefghiJkl
2025-07-01 05:50:48.801 + abcdefGhijkl
2025-07-01 05:50:48.814 """
2025-07-01 05:50:48.822
2025-07-01 05:50:48.830 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:50:48.839 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:50:48.849 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:50:48.857 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:50:48.863 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:50:48.870
2025-07-01 05:50:48.877 # search for the pair that matches best without being identical
2025-07-01 05:50:48.890 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:50:48.903 # on junk -- unless we have to)
2025-07-01 05:50:48.913 for j in range(blo, bhi):
2025-07-01 05:50:48.920 bj = b[j]
2025-07-01 05:50:48.926 cruncher.set_seq2(bj)
2025-07-01 05:50:48.932 for i in range(alo, ahi):
2025-07-01 05:50:48.937 ai = a[i]
2025-07-01 05:50:48.943 if ai == bj:
2025-07-01 05:50:48.948 if eqi is None:
2025-07-01 05:50:48.954 eqi, eqj = i, j
2025-07-01 05:50:48.964 continue
2025-07-01 05:50:48.974 cruncher.set_seq1(ai)
2025-07-01 05:50:48.982 # computing similarity is expensive, so use the quick
2025-07-01 05:50:48.993 # upper bounds first -- have seen this speed up messy
2025-07-01 05:50:49.001 # compares by a factor of 3.
2025-07-01 05:50:49.013 # note that ratio() is only expensive to compute the first
2025-07-01 05:50:49.020 # time it's called on a sequence pair; the expensive part
2025-07-01 05:50:49.026 # of the computation is cached by cruncher
2025-07-01 05:50:49.033 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:50:49.040 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:50:49.047 cruncher.ratio() > best_ratio:
2025-07-01 05:50:49.054 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:50:49.065 if best_ratio < cutoff:
2025-07-01 05:50:49.076 # no non-identical "pretty close" pair
2025-07-01 05:50:49.085 if eqi is None:
2025-07-01 05:50:49.092 # no identical pair either -- treat it as a straight replace
2025-07-01 05:50:49.098 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:50:49.104 return
2025-07-01 05:50:49.110 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:50:49.116 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:50:49.122 else:
2025-07-01 05:50:49.129 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:50:49.137 eqi = None
2025-07-01 05:50:49.145
2025-07-01 05:50:49.151 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:50:49.157 # identical
2025-07-01 05:50:49.163
2025-07-01 05:50:49.171 # pump out diffs from before the synch point
2025-07-01 05:50:49.183 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:50:49.191
2025-07-01 05:50:49.199 # do intraline marking on the synch pair
2025-07-01 05:50:49.208 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:50:49.214 if eqi is None:
2025-07-01 05:50:49.223 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:50:49.233 atags = btags = ""
2025-07-01 05:50:49.241 cruncher.set_seqs(aelt, belt)
2025-07-01 05:50:49.248 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:50:49.255 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:50:49.260 if tag == 'replace':
2025-07-01 05:50:49.267 atags += '^' * la
2025-07-01 05:50:49.278 btags += '^' * lb
2025-07-01 05:50:49.285 elif tag == 'delete':
2025-07-01 05:50:49.292 atags += '-' * la
2025-07-01 05:50:49.300 elif tag == 'insert':
2025-07-01 05:50:49.307 btags += '+' * lb
2025-07-01 05:50:49.314 elif tag == 'equal':
2025-07-01 05:50:49.322 atags += ' ' * la
2025-07-01 05:50:49.330 btags += ' ' * lb
2025-07-01 05:50:49.337 else:
2025-07-01 05:50:49.342 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:50:49.346 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:50:49.352 else:
2025-07-01 05:50:49.358 # the synch pair is identical
2025-07-01 05:50:49.365 yield ' ' + aelt
2025-07-01 05:50:49.373
2025-07-01 05:50:49.380 # pump out diffs from after the synch point
2025-07-01 05:50:49.387 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:50:49.393
2025-07-01 05:50:49.399 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:50:49.405 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:50:49.410
2025-07-01 05:50:49.416 self = <difflib.Differ object at [hex]>
2025-07-01 05:50:49.430 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:50:49.442 alo = 463, ahi = 1101
2025-07-01 05:50:49.452 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:50:49.459 blo = 463, bhi = 1101
2025-07-01 05:50:49.468
2025-07-01 05:50:49.478 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:50:49.485 g = []
2025-07-01 05:50:49.492 if alo < ahi:
2025-07-01 05:50:49.498 if blo < bhi:
2025-07-01 05:50:49.507 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:50:49.515 else:
2025-07-01 05:50:49.522 g = self._dump('-', a, alo, ahi)
2025-07-01 05:50:49.528 elif blo < bhi:
2025-07-01 05:50:49.535 g = self._dump('+', b, blo, bhi)
2025-07-01 05:50:49.540
2025-07-01 05:50:49.547 > yield from g
2025-07-01 05:50:49.555
2025-07-01 05:50:49.564 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:50:49.572 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:50:49.578
2025-07-01 05:50:49.584 self = <difflib.Differ object at [hex]>
2025-07-01 05:50:49.590 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:50:49.594 alo = 463, ahi = 1101
2025-07-01 05:50:49.600 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:50:49.605 blo = 463, bhi = 1101
2025-07-01 05:50:49.611
2025-07-01 05:50:49.617 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:50:49.622 r"""
2025-07-01 05:50:49.627 When replacing one block of lines with another, search the blocks
2025-07-01 05:50:49.639 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:50:49.648 synch point, and intraline difference marking is done on the
2025-07-01 05:50:49.657 similar pair. Lots of work, but often worth it.
2025-07-01 05:50:49.665
2025-07-01 05:50:49.671 Example:
2025-07-01 05:50:49.677
2025-07-01 05:50:49.683 >>> d = Differ()
2025-07-01 05:50:49.689 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:50:49.695 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:50:49.701 >>> print(''.join(results), end="")
2025-07-01 05:50:49.708 - abcDefghiJkl
2025-07-01 05:50:49.721 + abcdefGhijkl
2025-07-01 05:50:49.734 """
2025-07-01 05:50:49.740
2025-07-01 05:50:49.746 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:50:49.752 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:50:49.758 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:50:49.763 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:50:49.770 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:50:49.776
2025-07-01 05:50:49.782 # search for the pair that matches best without being identical
2025-07-01 05:50:49.788 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:50:49.794 # on junk -- unless we have to)
2025-07-01 05:50:49.800 for j in range(blo, bhi):
2025-07-01 05:50:49.806 bj = b[j]
2025-07-01 05:50:49.811 cruncher.set_seq2(bj)
2025-07-01 05:50:49.817 for i in range(alo, ahi):
2025-07-01 05:50:49.823 ai = a[i]
2025-07-01 05:50:49.829 if ai == bj:
2025-07-01 05:50:49.835 if eqi is None:
2025-07-01 05:50:49.841 eqi, eqj = i, j
2025-07-01 05:50:49.846 continue
2025-07-01 05:50:49.853 cruncher.set_seq1(ai)
2025-07-01 05:50:49.859 # computing similarity is expensive, so use the quick
2025-07-01 05:50:49.864 # upper bounds first -- have seen this speed up messy
2025-07-01 05:50:49.871 # compares by a factor of 3.
2025-07-01 05:50:49.877 # note that ratio() is only expensive to compute the first
2025-07-01 05:50:49.883 # time it's called on a sequence pair; the expensive part
2025-07-01 05:50:49.889 # of the computation is cached by cruncher
2025-07-01 05:50:49.895 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:50:49.901 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:50:49.907 cruncher.ratio() > best_ratio:
2025-07-01 05:50:49.916 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:50:49.929 if best_ratio < cutoff:
2025-07-01 05:50:49.937 # no non-identical "pretty close" pair
2025-07-01 05:50:49.949 if eqi is None:
2025-07-01 05:50:49.962 # no identical pair either -- treat it as a straight replace
2025-07-01 05:50:49.975 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:50:49.986 return
2025-07-01 05:50:49.995 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:50:50.003 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:50:50.012 else:
2025-07-01 05:50:50.021 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:50:50.028 eqi = None
2025-07-01 05:50:50.034
2025-07-01 05:50:50.040 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:50:50.046 # identical
2025-07-01 05:50:50.052
2025-07-01 05:50:50.058 # pump out diffs from before the synch point
2025-07-01 05:50:50.067 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:50:50.074
2025-07-01 05:50:50.082 # do intraline marking on the synch pair
2025-07-01 05:50:50.087 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:50:50.093 if eqi is None:
2025-07-01 05:50:50.098 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:50:50.103 atags = btags = ""
2025-07-01 05:50:50.110 cruncher.set_seqs(aelt, belt)
2025-07-01 05:50:50.120 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:50:50.132 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:50:50.141 if tag == 'replace':
2025-07-01 05:50:50.149 atags += '^' * la
2025-07-01 05:50:50.156 btags += '^' * lb
2025-07-01 05:50:50.162 elif tag == 'delete':
2025-07-01 05:50:50.168 atags += '-' * la
2025-07-01 05:50:50.174 elif tag == 'insert':
2025-07-01 05:50:50.185 btags += '+' * lb
2025-07-01 05:50:50.195 elif tag == 'equal':
2025-07-01 05:50:50.207 atags += ' ' * la
2025-07-01 05:50:50.216 btags += ' ' * lb
2025-07-01 05:50:50.224 else:
2025-07-01 05:50:50.231 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:50:50.238 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:50:50.244 else:
2025-07-01 05:50:50.250 # the synch pair is identical
2025-07-01 05:50:50.256 yield ' ' + aelt
2025-07-01 05:50:50.261
2025-07-01 05:50:50.268 # pump out diffs from after the synch point
2025-07-01 05:50:50.279 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:50:50.286
2025-07-01 05:50:50.294 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:50:50.302 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:50:50.315
2025-07-01 05:50:50.326 self = <difflib.Differ object at [hex]>
2025-07-01 05:50:50.335 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:50:50.343 alo = 466, ahi = 1101
2025-07-01 05:50:50.351 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:50:50.358 blo = 466, bhi = 1101
2025-07-01 05:50:50.370
2025-07-01 05:50:50.379 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:50:50.387 g = []
2025-07-01 05:50:50.395 if alo < ahi:
2025-07-01 05:50:50.402 if blo < bhi:
2025-07-01 05:50:50.408 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:50:50.414 else:
2025-07-01 05:50:50.418 g = self._dump('-', a, alo, ahi)
2025-07-01 05:50:50.424 elif blo < bhi:
2025-07-01 05:50:50.433 g = self._dump('+', b, blo, bhi)
2025-07-01 05:50:50.443
2025-07-01 05:50:50.455 > yield from g
2025-07-01 05:50:50.465
2025-07-01 05:50:50.472 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:50:50.477 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:50:50.483
2025-07-01 05:50:50.490 self = <difflib.Differ object at [hex]>
2025-07-01 05:50:50.497 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:50:50.503 alo = 466, ahi = 1101
2025-07-01 05:50:50.510 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:50:50.515 blo = 466, bhi = 1101
2025-07-01 05:50:50.522
2025-07-01 05:50:50.528 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:50:50.534 r"""
2025-07-01 05:50:50.541 When replacing one block of lines with another, search the blocks
2025-07-01 05:50:50.552 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:50:50.562 synch point, and intraline difference marking is done on the
2025-07-01 05:50:50.568 similar pair. Lots of work, but often worth it.
2025-07-01 05:50:50.575
2025-07-01 05:50:50.581 Example:
2025-07-01 05:50:50.588
2025-07-01 05:50:50.594 >>> d = Differ()
2025-07-01 05:50:50.603 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:50:50.610 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:50:50.616 >>> print(''.join(results), end="")
2025-07-01 05:50:50.622 - abcDefghiJkl
2025-07-01 05:50:50.635 + abcdefGhijkl
2025-07-01 05:50:50.651 """
2025-07-01 05:50:50.659
2025-07-01 05:50:50.667 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:50:50.674 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:50:50.681 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:50:50.688 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:50:50.694 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:50:50.700
2025-07-01 05:50:50.706 # search for the pair that matches best without being identical
2025-07-01 05:50:50.712 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:50:50.719 # on junk -- unless we have to)
2025-07-01 05:50:50.726 for j in range(blo, bhi):
2025-07-01 05:50:50.739 bj = b[j]
2025-07-01 05:50:50.747 cruncher.set_seq2(bj)
2025-07-01 05:50:50.757 for i in range(alo, ahi):
2025-07-01 05:50:50.764 ai = a[i]
2025-07-01 05:50:50.769 if ai == bj:
2025-07-01 05:50:50.775 if eqi is None:
2025-07-01 05:50:50.779 eqi, eqj = i, j
2025-07-01 05:50:50.784 continue
2025-07-01 05:50:50.793 cruncher.set_seq1(ai)
2025-07-01 05:50:50.805 # computing similarity is expensive, so use the quick
2025-07-01 05:50:50.813 # upper bounds first -- have seen this speed up messy
2025-07-01 05:50:50.819 # compares by a factor of 3.
2025-07-01 05:50:50.825 # note that ratio() is only expensive to compute the first
2025-07-01 05:50:50.830 # time it's called on a sequence pair; the expensive part
2025-07-01 05:50:50.836 # of the computation is cached by cruncher
2025-07-01 05:50:50.842 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:50:50.847 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:50:50.854 cruncher.ratio() > best_ratio:
2025-07-01 05:50:50.860 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:50:50.866 if best_ratio < cutoff:
2025-07-01 05:50:50.873 # no non-identical "pretty close" pair
2025-07-01 05:50:50.880 if eqi is None:
2025-07-01 05:50:50.888 # no identical pair either -- treat it as a straight replace
2025-07-01 05:50:50.894 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:50:50.901 return
2025-07-01 05:50:50.907 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:50:50.913 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:50:50.918 else:
2025-07-01 05:50:50.926 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:50:50.932 eqi = None
2025-07-01 05:50:50.939
2025-07-01 05:50:50.947 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:50:50.956 # identical
2025-07-01 05:50:50.969
2025-07-01 05:50:50.978 # pump out diffs from before the synch point
2025-07-01 05:50:50.983 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:50:50.988
2025-07-01 05:50:50.993 # do intraline marking on the synch pair
2025-07-01 05:50:50.997 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:50:51.001 if eqi is None:
2025-07-01 05:50:51.009 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:50:51.014 atags = btags = ""
2025-07-01 05:50:51.024 cruncher.set_seqs(aelt, belt)
2025-07-01 05:50:51.033 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:50:51.044 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:50:51.053 if tag == 'replace':
2025-07-01 05:50:51.060 atags += '^' * la
2025-07-01 05:50:51.066 btags += '^' * lb
2025-07-01 05:50:51.073 elif tag == 'delete':
2025-07-01 05:50:51.080 atags += '-' * la
2025-07-01 05:50:51.086 elif tag == 'insert':
2025-07-01 05:50:51.092 btags += '+' * lb
2025-07-01 05:50:51.098 elif tag == 'equal':
2025-07-01 05:50:51.109 atags += ' ' * la
2025-07-01 05:50:51.117 btags += ' ' * lb
2025-07-01 05:50:51.123 else:
2025-07-01 05:50:51.130 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:50:51.142 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:50:51.153 else:
2025-07-01 05:50:51.166 # the synch pair is identical
2025-07-01 05:50:51.174 yield ' ' + aelt
2025-07-01 05:50:51.183
2025-07-01 05:50:51.196 # pump out diffs from after the synch point
2025-07-01 05:50:51.208 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:50:51.217
2025-07-01 05:50:51.225 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:50:51.231 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:50:51.238
2025-07-01 05:50:51.244 self = <difflib.Differ object at [hex]>
2025-07-01 05:50:51.250 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:50:51.258 alo = 467, ahi = 1101
2025-07-01 05:50:51.265 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:50:51.271 blo = 467, bhi = 1101
2025-07-01 05:50:51.277
2025-07-01 05:50:51.283 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:50:51.295 g = []
2025-07-01 05:50:51.306 if alo < ahi:
2025-07-01 05:50:51.313 if blo < bhi:
2025-07-01 05:50:51.318 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:50:51.322 else:
2025-07-01 05:50:51.327 g = self._dump('-', a, alo, ahi)
2025-07-01 05:50:51.332 elif blo < bhi:
2025-07-01 05:50:51.336 g = self._dump('+', b, blo, bhi)
2025-07-01 05:50:51.342
2025-07-01 05:50:51.348 > yield from g
2025-07-01 05:50:51.354
2025-07-01 05:50:51.361 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:50:51.373 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:50:51.381
2025-07-01 05:50:51.388 self = <difflib.Differ object at [hex]>
2025-07-01 05:50:51.395 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:50:51.401 alo = 467, ahi = 1101
2025-07-01 05:50:51.408 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:50:51.414 blo = 467, bhi = 1101
2025-07-01 05:50:51.420
2025-07-01 05:50:51.426 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:50:51.434 r"""
2025-07-01 05:50:51.445 When replacing one block of lines with another, search the blocks
2025-07-01 05:50:51.455 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:50:51.465 synch point, and intraline difference marking is done on the
2025-07-01 05:50:51.473 similar pair. Lots of work, but often worth it.
2025-07-01 05:50:51.479
2025-07-01 05:50:51.485 Example:
2025-07-01 05:50:51.491
2025-07-01 05:50:51.503 >>> d = Differ()
2025-07-01 05:50:51.515 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:50:51.526 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:50:51.537 >>> print(''.join(results), end="")
2025-07-01 05:50:51.547 - abcDefghiJkl
2025-07-01 05:50:51.566 + abcdefGhijkl
2025-07-01 05:50:51.579 """
2025-07-01 05:50:51.586
2025-07-01 05:50:51.595 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:50:51.603 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:50:51.610 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:50:51.621 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:50:51.631 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:50:51.644
2025-07-01 05:50:51.653 # search for the pair that matches best without being identical
2025-07-01 05:50:51.661 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:50:51.667 # on junk -- unless we have to)
2025-07-01 05:50:51.678 for j in range(blo, bhi):
2025-07-01 05:50:51.688 bj = b[j]
2025-07-01 05:50:51.695 cruncher.set_seq2(bj)
2025-07-01 05:50:51.701 for i in range(alo, ahi):
2025-07-01 05:50:51.707 ai = a[i]
2025-07-01 05:50:51.714 if ai == bj:
2025-07-01 05:50:51.722 if eqi is None:
2025-07-01 05:50:51.728 eqi, eqj = i, j
2025-07-01 05:50:51.734 continue
2025-07-01 05:50:51.738 cruncher.set_seq1(ai)
2025-07-01 05:50:51.742 # computing similarity is expensive, so use the quick
2025-07-01 05:50:51.748 # upper bounds first -- have seen this speed up messy
2025-07-01 05:50:51.754 # compares by a factor of 3.
2025-07-01 05:50:51.760 # note that ratio() is only expensive to compute the first
2025-07-01 05:50:51.765 # time it's called on a sequence pair; the expensive part
2025-07-01 05:50:51.770 # of the computation is cached by cruncher
2025-07-01 05:50:51.778 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:50:51.786 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:50:51.793 cruncher.ratio() > best_ratio:
2025-07-01 05:50:51.800 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:50:51.806 if best_ratio < cutoff:
2025-07-01 05:50:51.816 # no non-identical "pretty close" pair
2025-07-01 05:50:51.823 if eqi is None:
2025-07-01 05:50:51.829 # no identical pair either -- treat it as a straight replace
2025-07-01 05:50:51.835 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:50:51.841 return
2025-07-01 05:50:51.846 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:50:51.852 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:50:51.858 else:
2025-07-01 05:50:51.863 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:50:51.869 eqi = None
2025-07-01 05:50:51.875
2025-07-01 05:50:51.882 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:50:51.887 # identical
2025-07-01 05:50:51.894
2025-07-01 05:50:51.903 # pump out diffs from before the synch point
2025-07-01 05:50:51.913 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:50:51.922
2025-07-01 05:50:51.928 # do intraline marking on the synch pair
2025-07-01 05:50:51.934 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:50:51.939 if eqi is None:
2025-07-01 05:50:51.946 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:50:51.951 atags = btags = ""
2025-07-01 05:50:51.957 cruncher.set_seqs(aelt, belt)
2025-07-01 05:50:51.964 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:50:51.970 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:50:51.982 if tag == 'replace':
2025-07-01 05:50:51.990 atags += '^' * la
2025-07-01 05:50:51.996 btags += '^' * lb
2025-07-01 05:50:52.002 elif tag == 'delete':
2025-07-01 05:50:52.009 atags += '-' * la
2025-07-01 05:50:52.016 elif tag == 'insert':
2025-07-01 05:50:52.022 btags += '+' * lb
2025-07-01 05:50:52.028 elif tag == 'equal':
2025-07-01 05:50:52.034 atags += ' ' * la
2025-07-01 05:50:52.040 btags += ' ' * lb
2025-07-01 05:50:52.046 else:
2025-07-01 05:50:52.051 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:50:52.056 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:50:52.061 else:
2025-07-01 05:50:52.066 # the synch pair is identical
2025-07-01 05:50:52.071 yield ' ' + aelt
2025-07-01 05:50:52.076
2025-07-01 05:50:52.082 # pump out diffs from after the synch point
2025-07-01 05:50:52.091 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:50:52.100
2025-07-01 05:50:52.106 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:50:52.113 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:50:52.118
2025-07-01 05:50:52.124 self = <difflib.Differ object at [hex]>
2025-07-01 05:50:52.130 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:50:52.136 alo = 468, ahi = 1101
2025-07-01 05:50:52.143 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:50:52.149 blo = 468, bhi = 1101
2025-07-01 05:50:52.156
2025-07-01 05:50:52.163 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:50:52.171 g = []
2025-07-01 05:50:52.182 if alo < ahi:
2025-07-01 05:50:52.193 if blo < bhi:
2025-07-01 05:50:52.202 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:50:52.209 else:
2025-07-01 05:50:52.215 g = self._dump('-', a, alo, ahi)
2025-07-01 05:50:52.221 elif blo < bhi:
2025-07-01 05:50:52.226 g = self._dump('+', b, blo, bhi)
2025-07-01 05:50:52.235
2025-07-01 05:50:52.244 > yield from g
2025-07-01 05:50:52.251
2025-07-01 05:50:52.256 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:50:52.260 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:50:52.265
2025-07-01 05:50:52.271 self = <difflib.Differ object at [hex]>
2025-07-01 05:50:52.277 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:50:52.283 alo = 468, ahi = 1101
2025-07-01 05:50:52.290 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:50:52.295 blo = 468, bhi = 1101
2025-07-01 05:50:52.301
2025-07-01 05:50:52.307 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:50:52.317 r"""
2025-07-01 05:50:52.326 When replacing one block of lines with another, search the blocks
2025-07-01 05:50:52.334 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:50:52.341 synch point, and intraline difference marking is done on the
2025-07-01 05:50:52.347 similar pair. Lots of work, but often worth it.
2025-07-01 05:50:52.355
2025-07-01 05:50:52.366 Example:
2025-07-01 05:50:52.375
2025-07-01 05:50:52.383 >>> d = Differ()
2025-07-01 05:50:52.393 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:50:52.405 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:50:52.416 >>> print(''.join(results), end="")
2025-07-01 05:50:52.426 - abcDefghiJkl
2025-07-01 05:50:52.448 + abcdefGhijkl
2025-07-01 05:50:52.462 """
2025-07-01 05:50:52.467
2025-07-01 05:50:52.473 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:50:52.478 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:50:52.484 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:50:52.490 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:50:52.497 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:50:52.503
2025-07-01 05:50:52.510 # search for the pair that matches best without being identical
2025-07-01 05:50:52.515 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:50:52.523 # on junk -- unless we have to)
2025-07-01 05:50:52.534 for j in range(blo, bhi):
2025-07-01 05:50:52.543 bj = b[j]
2025-07-01 05:50:52.551 cruncher.set_seq2(bj)
2025-07-01 05:50:52.557 for i in range(alo, ahi):
2025-07-01 05:50:52.563 ai = a[i]
2025-07-01 05:50:52.575 if ai == bj:
2025-07-01 05:50:52.585 if eqi is None:
2025-07-01 05:50:52.592 eqi, eqj = i, j
2025-07-01 05:50:52.599 continue
2025-07-01 05:50:52.609 cruncher.set_seq1(ai)
2025-07-01 05:50:52.617 # computing similarity is expensive, so use the quick
2025-07-01 05:50:52.628 # upper bounds first -- have seen this speed up messy
2025-07-01 05:50:52.637 # compares by a factor of 3.
2025-07-01 05:50:52.646 # note that ratio() is only expensive to compute the first
2025-07-01 05:50:52.657 # time it's called on a sequence pair; the expensive part
2025-07-01 05:50:52.663 # of the computation is cached by cruncher
2025-07-01 05:50:52.668 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:50:52.673 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:50:52.682 cruncher.ratio() > best_ratio:
2025-07-01 05:50:52.688 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:50:52.693 if best_ratio < cutoff:
2025-07-01 05:50:52.698 # no non-identical "pretty close" pair
2025-07-01 05:50:52.705 if eqi is None:
2025-07-01 05:50:52.712 # no identical pair either -- treat it as a straight replace
2025-07-01 05:50:52.719 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:50:52.728 return
2025-07-01 05:50:52.735 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:50:52.743 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:50:52.749 else:
2025-07-01 05:50:52.756 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:50:52.762 eqi = None
2025-07-01 05:50:52.768
2025-07-01 05:50:52.775 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:50:52.782 # identical
2025-07-01 05:50:52.793
2025-07-01 05:50:52.804 # pump out diffs from before the synch point
2025-07-01 05:50:52.816 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:50:52.827
2025-07-01 05:50:52.834 # do intraline marking on the synch pair
2025-07-01 05:50:52.844 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:50:52.854 if eqi is None:
2025-07-01 05:50:52.866 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:50:52.875 atags = btags = ""
2025-07-01 05:50:52.881 cruncher.set_seqs(aelt, belt)
2025-07-01 05:50:52.887 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:50:52.894 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:50:52.898 if tag == 'replace':
2025-07-01 05:50:52.902 atags += '^' * la
2025-07-01 05:50:52.907 btags += '^' * lb
2025-07-01 05:50:52.911 elif tag == 'delete':
2025-07-01 05:50:52.916 atags += '-' * la
2025-07-01 05:50:52.920 elif tag == 'insert':
2025-07-01 05:50:52.930 btags += '+' * lb
2025-07-01 05:50:52.937 elif tag == 'equal':
2025-07-01 05:50:52.947 atags += ' ' * la
2025-07-01 05:50:52.957 btags += ' ' * lb
2025-07-01 05:50:52.965 else:
2025-07-01 05:50:52.976 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:50:52.987 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:50:52.996 else:
2025-07-01 05:50:53.004 # the synch pair is identical
2025-07-01 05:50:53.011 yield ' ' + aelt
2025-07-01 05:50:53.020
2025-07-01 05:50:53.026 # pump out diffs from after the synch point
2025-07-01 05:50:53.032 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:50:53.037
2025-07-01 05:50:53.043 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:50:53.049 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:50:53.055
2025-07-01 05:50:53.067 self = <difflib.Differ object at [hex]>
2025-07-01 05:50:53.080 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:50:53.089 alo = 469, ahi = 1101
2025-07-01 05:50:53.097 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:50:53.103 blo = 469, bhi = 1101
2025-07-01 05:50:53.109
2025-07-01 05:50:53.116 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:50:53.123 g = []
2025-07-01 05:50:53.132 if alo < ahi:
2025-07-01 05:50:53.145 if blo < bhi:
2025-07-01 05:50:53.156 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:50:53.168 else:
2025-07-01 05:50:53.179 g = self._dump('-', a, alo, ahi)
2025-07-01 05:50:53.189 elif blo < bhi:
2025-07-01 05:50:53.201 g = self._dump('+', b, blo, bhi)
2025-07-01 05:50:53.211
2025-07-01 05:50:53.222 > yield from g
2025-07-01 05:50:53.231
2025-07-01 05:50:53.241 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:50:53.251 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:50:53.258
2025-07-01 05:50:53.266 self = <difflib.Differ object at [hex]>
2025-07-01 05:50:53.278 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:50:53.286 alo = 469, ahi = 1101
2025-07-01 05:50:53.293 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:50:53.298 blo = 469, bhi = 1101
2025-07-01 05:50:53.303
2025-07-01 05:50:53.307 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:50:53.312 r"""
2025-07-01 05:50:53.317 When replacing one block of lines with another, search the blocks
2025-07-01 05:50:53.322 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:50:53.335 synch point, and intraline difference marking is done on the
2025-07-01 05:50:53.344 similar pair. Lots of work, but often worth it.
2025-07-01 05:50:53.352
2025-07-01 05:50:53.362 Example:
2025-07-01 05:50:53.374
2025-07-01 05:50:53.387 >>> d = Differ()
2025-07-01 05:50:53.398 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:50:53.409 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:50:53.423 >>> print(''.join(results), end="")
2025-07-01 05:50:53.433 - abcDefghiJkl
2025-07-01 05:50:53.448 + abcdefGhijkl
2025-07-01 05:50:53.467 """
2025-07-01 05:50:53.475
2025-07-01 05:50:53.488 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:50:53.497 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:50:53.504 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:50:53.511 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:50:53.522 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:50:53.536
2025-07-01 05:50:53.546 # search for the pair that matches best without being identical
2025-07-01 05:50:53.552 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:50:53.560 # on junk -- unless we have to)
2025-07-01 05:50:53.568 for j in range(blo, bhi):
2025-07-01 05:50:53.576 bj = b[j]
2025-07-01 05:50:53.582 cruncher.set_seq2(bj)
2025-07-01 05:50:53.588 for i in range(alo, ahi):
2025-07-01 05:50:53.594 ai = a[i]
2025-07-01 05:50:53.600 if ai == bj:
2025-07-01 05:50:53.607 if eqi is None:
2025-07-01 05:50:53.613 eqi, eqj = i, j
2025-07-01 05:50:53.619 continue
2025-07-01 05:50:53.624 cruncher.set_seq1(ai)
2025-07-01 05:50:53.630 # computing similarity is expensive, so use the quick
2025-07-01 05:50:53.636 # upper bounds first -- have seen this speed up messy
2025-07-01 05:50:53.644 # compares by a factor of 3.
2025-07-01 05:50:53.656 # note that ratio() is only expensive to compute the first
2025-07-01 05:50:53.666 # time it's called on a sequence pair; the expensive part
2025-07-01 05:50:53.673 # of the computation is cached by cruncher
2025-07-01 05:50:53.682 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:50:53.692 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:50:53.700 cruncher.ratio() > best_ratio:
2025-07-01 05:50:53.712 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:50:53.722 if best_ratio < cutoff:
2025-07-01 05:50:53.734 # no non-identical "pretty close" pair
2025-07-01 05:50:53.745 if eqi is None:
2025-07-01 05:50:53.753 # no identical pair either -- treat it as a straight replace
2025-07-01 05:50:53.764 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:50:53.775 return
2025-07-01 05:50:53.785 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:50:53.797 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:50:53.810 else:
2025-07-01 05:50:53.821 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:50:53.832 eqi = None
2025-07-01 05:50:53.841
2025-07-01 05:50:53.849 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:50:53.856 # identical
2025-07-01 05:50:53.862
2025-07-01 05:50:53.868 # pump out diffs from before the synch point
2025-07-01 05:50:53.874 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:50:53.886
2025-07-01 05:50:53.897 # do intraline marking on the synch pair
2025-07-01 05:50:53.905 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:50:53.913 if eqi is None:
2025-07-01 05:50:53.925 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:50:53.933 atags = btags = ""
2025-07-01 05:50:53.940 cruncher.set_seqs(aelt, belt)
2025-07-01 05:50:53.947 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:50:53.953 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:50:53.965 if tag == 'replace':
2025-07-01 05:50:53.975 atags += '^' * la
2025-07-01 05:50:53.986 btags += '^' * lb
2025-07-01 05:50:53.997 elif tag == 'delete':
2025-07-01 05:50:54.009 atags += '-' * la
2025-07-01 05:50:54.020 elif tag == 'insert':
2025-07-01 05:50:54.029 btags += '+' * lb
2025-07-01 05:50:54.041 elif tag == 'equal':
2025-07-01 05:50:54.050 atags += ' ' * la
2025-07-01 05:50:54.059 btags += ' ' * lb
2025-07-01 05:50:54.069 else:
2025-07-01 05:50:54.076 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:50:54.082 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:50:54.088 else:
2025-07-01 05:50:54.100 # the synch pair is identical
2025-07-01 05:50:54.112 yield ' ' + aelt
2025-07-01 05:50:54.121
2025-07-01 05:50:54.128 # pump out diffs from after the synch point
2025-07-01 05:50:54.140 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:50:54.150
2025-07-01 05:50:54.161 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:50:54.172 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:50:54.181
2025-07-01 05:50:54.189 self = <difflib.Differ object at [hex]>
2025-07-01 05:50:54.198 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:50:54.204 alo = 470, ahi = 1101
2025-07-01 05:50:54.218 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:50:54.225 blo = 470, bhi = 1101
2025-07-01 05:50:54.230
2025-07-01 05:50:54.235 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:50:54.240 g = []
2025-07-01 05:50:54.247 if alo < ahi:
2025-07-01 05:50:54.257 if blo < bhi:
2025-07-01 05:50:54.264 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:50:54.270 else:
2025-07-01 05:50:54.276 g = self._dump('-', a, alo, ahi)
2025-07-01 05:50:54.282 elif blo < bhi:
2025-07-01 05:50:54.290 g = self._dump('+', b, blo, bhi)
2025-07-01 05:50:54.297
2025-07-01 05:50:54.304 > yield from g
2025-07-01 05:50:54.310
2025-07-01 05:50:54.316 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:50:54.326 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:50:54.338
2025-07-01 05:50:54.348 self = <difflib.Differ object at [hex]>
2025-07-01 05:50:54.356 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:50:54.363 alo = 470, ahi = 1101
2025-07-01 05:50:54.376 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:50:54.388 blo = 470, bhi = 1101
2025-07-01 05:50:54.399
2025-07-01 05:50:54.408 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:50:54.417 r"""
2025-07-01 05:50:54.424 When replacing one block of lines with another, search the blocks
2025-07-01 05:50:54.430 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:50:54.437 synch point, and intraline difference marking is done on the
2025-07-01 05:50:54.443 similar pair. Lots of work, but often worth it.
2025-07-01 05:50:54.449
2025-07-01 05:50:54.454 Example:
2025-07-01 05:50:54.464
2025-07-01 05:50:54.474 >>> d = Differ()
2025-07-01 05:50:54.483 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:50:54.490 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:50:54.497 >>> print(''.join(results), end="")
2025-07-01 05:50:54.502 - abcDefghiJkl
2025-07-01 05:50:54.524 + abcdefGhijkl
2025-07-01 05:50:54.539 """
2025-07-01 05:50:54.547
2025-07-01 05:50:54.557 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:50:54.567 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:50:54.579 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:50:54.590 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:50:54.600 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:50:54.612
2025-07-01 05:50:54.624 # search for the pair that matches best without being identical
2025-07-01 05:50:54.635 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:50:54.643 # on junk -- unless we have to)
2025-07-01 05:50:54.650 for j in range(blo, bhi):
2025-07-01 05:50:54.661 bj = b[j]
2025-07-01 05:50:54.670 cruncher.set_seq2(bj)
2025-07-01 05:50:54.679 for i in range(alo, ahi):
2025-07-01 05:50:54.687 ai = a[i]
2025-07-01 05:50:54.695 if ai == bj:
2025-07-01 05:50:54.702 if eqi is None:
2025-07-01 05:50:54.713 eqi, eqj = i, j
2025-07-01 05:50:54.722 continue
2025-07-01 05:50:54.729 cruncher.set_seq1(ai)
2025-07-01 05:50:54.742 # computing similarity is expensive, so use the quick
2025-07-01 05:50:54.752 # upper bounds first -- have seen this speed up messy
2025-07-01 05:50:54.763 # compares by a factor of 3.
2025-07-01 05:50:54.770 # note that ratio() is only expensive to compute the first
2025-07-01 05:50:54.779 # time it's called on a sequence pair; the expensive part
2025-07-01 05:50:54.789 # of the computation is cached by cruncher
2025-07-01 05:50:54.798 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:50:54.806 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:50:54.812 cruncher.ratio() > best_ratio:
2025-07-01 05:50:54.818 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:50:54.824 if best_ratio < cutoff:
2025-07-01 05:50:54.830 # no non-identical "pretty close" pair
2025-07-01 05:50:54.835 if eqi is None:
2025-07-01 05:50:54.840 # no identical pair either -- treat it as a straight replace
2025-07-01 05:50:54.845 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:50:54.853 return
2025-07-01 05:50:54.863 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:50:54.871 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:50:54.878 else:
2025-07-01 05:50:54.885 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:50:54.892 eqi = None
2025-07-01 05:50:54.898
2025-07-01 05:50:54.910 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:50:54.919 # identical
2025-07-01 05:50:54.927
2025-07-01 05:50:54.938 # pump out diffs from before the synch point
2025-07-01 05:50:54.949 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:50:54.960
2025-07-01 05:50:54.972 # do intraline marking on the synch pair
2025-07-01 05:50:54.983 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:50:54.991 if eqi is None:
2025-07-01 05:50:54.999 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:50:55.007 atags = btags = ""
2025-07-01 05:50:55.017 cruncher.set_seqs(aelt, belt)
2025-07-01 05:50:55.025 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:50:55.030 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:50:55.036 if tag == 'replace':
2025-07-01 05:50:55.042 atags += '^' * la
2025-07-01 05:50:55.048 btags += '^' * lb
2025-07-01 05:50:55.054 elif tag == 'delete':
2025-07-01 05:50:55.060 atags += '-' * la
2025-07-01 05:50:55.065 elif tag == 'insert':
2025-07-01 05:50:55.069 btags += '+' * lb
2025-07-01 05:50:55.074 elif tag == 'equal':
2025-07-01 05:50:55.080 atags += ' ' * la
2025-07-01 05:50:55.087 btags += ' ' * lb
2025-07-01 05:50:55.093 else:
2025-07-01 05:50:55.099 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:50:55.111 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:50:55.122 else:
2025-07-01 05:50:55.132 # the synch pair is identical
2025-07-01 05:50:55.139 yield ' ' + aelt
2025-07-01 05:50:55.149
2025-07-01 05:50:55.156 # pump out diffs from after the synch point
2025-07-01 05:50:55.163 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:50:55.170
2025-07-01 05:50:55.175 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:50:55.181 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:50:55.187
2025-07-01 05:50:55.194 self = <difflib.Differ object at [hex]>
2025-07-01 05:50:55.202 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:50:55.208 alo = 471, ahi = 1101
2025-07-01 05:50:55.214 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:50:55.219 blo = 471, bhi = 1101
2025-07-01 05:50:55.224
2025-07-01 05:50:55.229 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:50:55.234 g = []
2025-07-01 05:50:55.238 if alo < ahi:
2025-07-01 05:50:55.244 if blo < bhi:
2025-07-01 05:50:55.248 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:50:55.253 else:
2025-07-01 05:50:55.257 g = self._dump('-', a, alo, ahi)
2025-07-01 05:50:55.263 elif blo < bhi:
2025-07-01 05:50:55.269 g = self._dump('+', b, blo, bhi)
2025-07-01 05:50:55.274
2025-07-01 05:50:55.284 > yield from g
2025-07-01 05:50:55.293
2025-07-01 05:50:55.304 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:50:55.315 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:50:55.323
2025-07-01 05:50:55.331 self = <difflib.Differ object at [hex]>
2025-07-01 05:50:55.338 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:50:55.344 alo = 471, ahi = 1101
2025-07-01 05:50:55.351 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:50:55.359 blo = 471, bhi = 1101
2025-07-01 05:50:55.369
2025-07-01 05:50:55.379 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:50:55.386 r"""
2025-07-01 05:50:55.399 When replacing one block of lines with another, search the blocks
2025-07-01 05:50:55.410 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:50:55.416 synch point, and intraline difference marking is done on the
2025-07-01 05:50:55.422 similar pair. Lots of work, but often worth it.
2025-07-01 05:50:55.427
2025-07-01 05:50:55.439 Example:
2025-07-01 05:50:55.450
2025-07-01 05:50:55.461 >>> d = Differ()
2025-07-01 05:50:55.471 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:50:55.479 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:50:55.488 >>> print(''.join(results), end="")
2025-07-01 05:50:55.498 - abcDefghiJkl
2025-07-01 05:50:55.517 + abcdefGhijkl
2025-07-01 05:50:55.539 """
2025-07-01 05:50:55.548
2025-07-01 05:50:55.555 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:50:55.562 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:50:55.573 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:50:55.582 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:50:55.589 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:50:55.601
2025-07-01 05:50:55.612 # search for the pair that matches best without being identical
2025-07-01 05:50:55.621 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:50:55.627 # on junk -- unless we have to)
2025-07-01 05:50:55.633 for j in range(blo, bhi):
2025-07-01 05:50:55.639 bj = b[j]
2025-07-01 05:50:55.649 cruncher.set_seq2(bj)
2025-07-01 05:50:55.659 for i in range(alo, ahi):
2025-07-01 05:50:55.667 ai = a[i]
2025-07-01 05:50:55.672 if ai == bj:
2025-07-01 05:50:55.677 if eqi is None:
2025-07-01 05:50:55.683 eqi, eqj = i, j
2025-07-01 05:50:55.690 continue
2025-07-01 05:50:55.699 cruncher.set_seq1(ai)
2025-07-01 05:50:55.707 # computing similarity is expensive, so use the quick
2025-07-01 05:50:55.717 # upper bounds first -- have seen this speed up messy
2025-07-01 05:50:55.724 # compares by a factor of 3.
2025-07-01 05:50:55.731 # note that ratio() is only expensive to compute the first
2025-07-01 05:50:55.738 # time it's called on a sequence pair; the expensive part
2025-07-01 05:50:55.746 # of the computation is cached by cruncher
2025-07-01 05:50:55.753 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:50:55.759 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:50:55.765 cruncher.ratio() > best_ratio:
2025-07-01 05:50:55.771 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:50:55.778 if best_ratio < cutoff:
2025-07-01 05:50:55.791 # no non-identical "pretty close" pair
2025-07-01 05:50:55.800 if eqi is None:
2025-07-01 05:50:55.808 # no identical pair either -- treat it as a straight replace
2025-07-01 05:50:55.815 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:50:55.822 return
2025-07-01 05:50:55.832 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:50:55.840 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:50:55.846 else:
2025-07-01 05:50:55.853 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:50:55.859 eqi = None
2025-07-01 05:50:55.866
2025-07-01 05:50:55.872 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:50:55.878 # identical
2025-07-01 05:50:55.889
2025-07-01 05:50:55.902 # pump out diffs from before the synch point
2025-07-01 05:50:55.910 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:50:55.917
2025-07-01 05:50:55.924 # do intraline marking on the synch pair
2025-07-01 05:50:55.930 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:50:55.939 if eqi is None:
2025-07-01 05:50:55.947 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:50:55.954 atags = btags = ""
2025-07-01 05:50:55.964 cruncher.set_seqs(aelt, belt)
2025-07-01 05:50:55.973 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:50:55.983 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:50:55.993 if tag == 'replace':
2025-07-01 05:50:56.004 atags += '^' * la
2025-07-01 05:50:56.014 btags += '^' * lb
2025-07-01 05:50:56.026 elif tag == 'delete':
2025-07-01 05:50:56.035 atags += '-' * la
2025-07-01 05:50:56.043 elif tag == 'insert':
2025-07-01 05:50:56.051 btags += '+' * lb
2025-07-01 05:50:56.061 elif tag == 'equal':
2025-07-01 05:50:56.069 atags += ' ' * la
2025-07-01 05:50:56.076 btags += ' ' * lb
2025-07-01 05:50:56.084 else:
2025-07-01 05:50:56.097 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:50:56.107 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:50:56.116 else:
2025-07-01 05:50:56.123 # the synch pair is identical
2025-07-01 05:50:56.135 yield ' ' + aelt
2025-07-01 05:50:56.145
2025-07-01 05:50:56.157 # pump out diffs from after the synch point
2025-07-01 05:50:56.167 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:50:56.175
2025-07-01 05:50:56.183 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:50:56.189 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:50:56.196
2025-07-01 05:50:56.206 self = <difflib.Differ object at [hex]>
2025-07-01 05:50:56.219 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:50:56.226 alo = 472, ahi = 1101
2025-07-01 05:50:56.234 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:50:56.239 blo = 472, bhi = 1101
2025-07-01 05:50:56.245
2025-07-01 05:50:56.251 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:50:56.257 g = []
2025-07-01 05:50:56.263 if alo < ahi:
2025-07-01 05:50:56.270 if blo < bhi:
2025-07-01 05:50:56.277 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:50:56.284 else:
2025-07-01 05:50:56.291 g = self._dump('-', a, alo, ahi)
2025-07-01 05:50:56.297 elif blo < bhi:
2025-07-01 05:50:56.303 g = self._dump('+', b, blo, bhi)
2025-07-01 05:50:56.309
2025-07-01 05:50:56.314 > yield from g
2025-07-01 05:50:56.318
2025-07-01 05:50:56.323 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:50:56.329 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:50:56.334
2025-07-01 05:50:56.341 self = <difflib.Differ object at [hex]>
2025-07-01 05:50:56.353 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:50:56.363 alo = 472, ahi = 1101
2025-07-01 05:50:56.370 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:50:56.377 blo = 472, bhi = 1101
2025-07-01 05:50:56.383
2025-07-01 05:50:56.389 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:50:56.394 r"""
2025-07-01 05:50:56.402 When replacing one block of lines with another, search the blocks
2025-07-01 05:50:56.417 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:50:56.428 synch point, and intraline difference marking is done on the
2025-07-01 05:50:56.437 similar pair. Lots of work, but often worth it.
2025-07-01 05:50:56.443
2025-07-01 05:50:56.449 Example:
2025-07-01 05:50:56.455
2025-07-01 05:50:56.460 >>> d = Differ()
2025-07-01 05:50:56.465 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:50:56.471 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:50:56.477 >>> print(''.join(results), end="")
2025-07-01 05:50:56.484 - abcDefghiJkl
2025-07-01 05:50:56.500 + abcdefGhijkl
2025-07-01 05:50:56.519 """
2025-07-01 05:50:56.525
2025-07-01 05:50:56.531 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:50:56.538 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:50:56.545 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:50:56.552 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:50:56.560 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:50:56.566
2025-07-01 05:50:56.579 # search for the pair that matches best without being identical
2025-07-01 05:50:56.590 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:50:56.601 # on junk -- unless we have to)
2025-07-01 05:50:56.609 for j in range(blo, bhi):
2025-07-01 05:50:56.617 bj = b[j]
2025-07-01 05:50:56.624 cruncher.set_seq2(bj)
2025-07-01 05:50:56.631 for i in range(alo, ahi):
2025-07-01 05:50:56.639 ai = a[i]
2025-07-01 05:50:56.646 if ai == bj:
2025-07-01 05:50:56.653 if eqi is None:
2025-07-01 05:50:56.667 eqi, eqj = i, j
2025-07-01 05:50:56.677 continue
2025-07-01 05:50:56.684 cruncher.set_seq1(ai)
2025-07-01 05:50:56.690 # computing similarity is expensive, so use the quick
2025-07-01 05:50:56.696 # upper bounds first -- have seen this speed up messy
2025-07-01 05:50:56.702 # compares by a factor of 3.
2025-07-01 05:50:56.709 # note that ratio() is only expensive to compute the first
2025-07-01 05:50:56.715 # time it's called on a sequence pair; the expensive part
2025-07-01 05:50:56.721 # of the computation is cached by cruncher
2025-07-01 05:50:56.727 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:50:56.732 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:50:56.738 cruncher.ratio() > best_ratio:
2025-07-01 05:50:56.744 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:50:56.749 if best_ratio < cutoff:
2025-07-01 05:50:56.755 # no non-identical "pretty close" pair
2025-07-01 05:50:56.760 if eqi is None:
2025-07-01 05:50:56.767 # no identical pair either -- treat it as a straight replace
2025-07-01 05:50:56.774 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:50:56.781 return
2025-07-01 05:50:56.787 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:50:56.793 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:50:56.799 else:
2025-07-01 05:50:56.806 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:50:56.816 eqi = None
2025-07-01 05:50:56.825
2025-07-01 05:50:56.832 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:50:56.838 # identical
2025-07-01 05:50:56.843
2025-07-01 05:50:56.849 # pump out diffs from before the synch point
2025-07-01 05:50:56.854 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:50:56.861
2025-07-01 05:50:56.867 # do intraline marking on the synch pair
2025-07-01 05:50:56.875 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:50:56.883 if eqi is None:
2025-07-01 05:50:56.890 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:50:56.898 atags = btags = ""
2025-07-01 05:50:56.910 cruncher.set_seqs(aelt, belt)
2025-07-01 05:50:56.920 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:50:56.932 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:50:56.944 if tag == 'replace':
2025-07-01 05:50:56.955 atags += '^' * la
2025-07-01 05:50:56.963 btags += '^' * lb
2025-07-01 05:50:56.969 elif tag == 'delete':
2025-07-01 05:50:56.974 atags += '-' * la
2025-07-01 05:50:56.979 elif tag == 'insert':
2025-07-01 05:50:56.984 btags += '+' * lb
2025-07-01 05:50:56.991 elif tag == 'equal':
2025-07-01 05:50:56.998 atags += ' ' * la
2025-07-01 05:50:57.006 btags += ' ' * lb
2025-07-01 05:50:57.017 else:
2025-07-01 05:50:57.027 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:50:57.033 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:50:57.040 else:
2025-07-01 05:50:57.047 # the synch pair is identical
2025-07-01 05:50:57.054 yield ' ' + aelt
2025-07-01 05:50:57.061
2025-07-01 05:50:57.068 # pump out diffs from after the synch point
2025-07-01 05:50:57.075 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:50:57.081
2025-07-01 05:50:57.088 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:50:57.095 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:50:57.102
2025-07-01 05:50:57.111 self = <difflib.Differ object at [hex]>
2025-07-01 05:50:57.121 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:50:57.131 alo = 473, ahi = 1101
2025-07-01 05:50:57.140 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:50:57.147 blo = 473, bhi = 1101
2025-07-01 05:50:57.153
2025-07-01 05:50:57.163 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:50:57.175 g = []
2025-07-01 05:50:57.187 if alo < ahi:
2025-07-01 05:50:57.200 if blo < bhi:
2025-07-01 05:50:57.211 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:50:57.223 else:
2025-07-01 05:50:57.236 g = self._dump('-', a, alo, ahi)
2025-07-01 05:50:57.247 elif blo < bhi:
2025-07-01 05:50:57.258 g = self._dump('+', b, blo, bhi)
2025-07-01 05:50:57.268
2025-07-01 05:50:57.277 > yield from g
2025-07-01 05:50:57.283
2025-07-01 05:50:57.290 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:50:57.300 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:50:57.310
2025-07-01 05:50:57.318 self = <difflib.Differ object at [hex]>
2025-07-01 05:50:57.325 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:50:57.331 alo = 473, ahi = 1101
2025-07-01 05:50:57.337 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:50:57.347 blo = 473, bhi = 1101
2025-07-01 05:50:57.353
2025-07-01 05:50:57.358 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:50:57.364 r"""
2025-07-01 05:50:57.370 When replacing one block of lines with another, search the blocks
2025-07-01 05:50:57.380 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:50:57.391 synch point, and intraline difference marking is done on the
2025-07-01 05:50:57.402 similar pair. Lots of work, but often worth it.
2025-07-01 05:50:57.413
2025-07-01 05:50:57.424 Example:
2025-07-01 05:50:57.435
2025-07-01 05:50:57.445 >>> d = Differ()
2025-07-01 05:50:57.454 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:50:57.463 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:50:57.471 >>> print(''.join(results), end="")
2025-07-01 05:50:57.479 - abcDefghiJkl
2025-07-01 05:50:57.492 + abcdefGhijkl
2025-07-01 05:50:57.504 """
2025-07-01 05:50:57.510
2025-07-01 05:50:57.521 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:50:57.531 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:50:57.539 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:50:57.546 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:50:57.553 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:50:57.559
2025-07-01 05:50:57.565 # search for the pair that matches best without being identical
2025-07-01 05:50:57.572 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:50:57.579 # on junk -- unless we have to)
2025-07-01 05:50:57.584 for j in range(blo, bhi):
2025-07-01 05:50:57.595 bj = b[j]
2025-07-01 05:50:57.606 cruncher.set_seq2(bj)
2025-07-01 05:50:57.618 for i in range(alo, ahi):
2025-07-01 05:50:57.629 ai = a[i]
2025-07-01 05:50:57.640 if ai == bj:
2025-07-01 05:50:57.649 if eqi is None:
2025-07-01 05:50:57.662 eqi, eqj = i, j
2025-07-01 05:50:57.675 continue
2025-07-01 05:50:57.687 cruncher.set_seq1(ai)
2025-07-01 05:50:57.696 # computing similarity is expensive, so use the quick
2025-07-01 05:50:57.707 # upper bounds first -- have seen this speed up messy
2025-07-01 05:50:57.714 # compares by a factor of 3.
2025-07-01 05:50:57.724 # note that ratio() is only expensive to compute the first
2025-07-01 05:50:57.735 # time it's called on a sequence pair; the expensive part
2025-07-01 05:50:57.746 # of the computation is cached by cruncher
2025-07-01 05:50:57.755 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:50:57.763 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:50:57.771 cruncher.ratio() > best_ratio:
2025-07-01 05:50:57.779 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:50:57.790 if best_ratio < cutoff:
2025-07-01 05:50:57.797 # no non-identical "pretty close" pair
2025-07-01 05:50:57.804 if eqi is None:
2025-07-01 05:50:57.809 # no identical pair either -- treat it as a straight replace
2025-07-01 05:50:57.815 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:50:57.819 return
2025-07-01 05:50:57.824 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:50:57.829 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:50:57.836 else:
2025-07-01 05:50:57.843 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:50:57.849 eqi = None
2025-07-01 05:50:57.855
2025-07-01 05:50:57.862 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:50:57.871 # identical
2025-07-01 05:50:57.879
2025-07-01 05:50:57.887 # pump out diffs from before the synch point
2025-07-01 05:50:57.895 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:50:57.901
2025-07-01 05:50:57.907 # do intraline marking on the synch pair
2025-07-01 05:50:57.919 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:50:57.929 if eqi is None:
2025-07-01 05:50:57.938 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:50:57.946 atags = btags = ""
2025-07-01 05:50:57.955 cruncher.set_seqs(aelt, belt)
2025-07-01 05:50:57.966 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:50:57.975 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:50:57.983 if tag == 'replace':
2025-07-01 05:50:57.994 atags += '^' * la
2025-07-01 05:50:58.003 btags += '^' * lb
2025-07-01 05:50:58.010 elif tag == 'delete':
2025-07-01 05:50:58.022 atags += '-' * la
2025-07-01 05:50:58.030 elif tag == 'insert':
2025-07-01 05:50:58.039 btags += '+' * lb
2025-07-01 05:50:58.050 elif tag == 'equal':
2025-07-01 05:50:58.059 atags += ' ' * la
2025-07-01 05:50:58.068 btags += ' ' * lb
2025-07-01 05:50:58.079 else:
2025-07-01 05:50:58.088 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:50:58.096 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:50:58.103 else:
2025-07-01 05:50:58.115 # the synch pair is identical
2025-07-01 05:50:58.125 yield ' ' + aelt
2025-07-01 05:50:58.132
2025-07-01 05:50:58.139 # pump out diffs from after the synch point
2025-07-01 05:50:58.147 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:50:58.153
2025-07-01 05:50:58.159 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:50:58.167 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:50:58.178
2025-07-01 05:50:58.190 self = <difflib.Differ object at [hex]>
2025-07-01 05:50:58.200 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:50:58.207 alo = 474, ahi = 1101
2025-07-01 05:50:58.214 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:50:58.220 blo = 474, bhi = 1101
2025-07-01 05:50:58.227
2025-07-01 05:50:58.238 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:50:58.247 g = []
2025-07-01 05:50:58.257 if alo < ahi:
2025-07-01 05:50:58.269 if blo < bhi:
2025-07-01 05:50:58.280 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:50:58.289 else:
2025-07-01 05:50:58.296 g = self._dump('-', a, alo, ahi)
2025-07-01 05:50:58.303 elif blo < bhi:
2025-07-01 05:50:58.311 g = self._dump('+', b, blo, bhi)
2025-07-01 05:50:58.322
2025-07-01 05:50:58.333 > yield from g
2025-07-01 05:50:58.345
2025-07-01 05:50:58.358 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:50:58.368 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:50:58.375
2025-07-01 05:50:58.382 self = <difflib.Differ object at [hex]>
2025-07-01 05:50:58.390 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:50:58.400 alo = 474, ahi = 1101
2025-07-01 05:50:58.411 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:50:58.422 blo = 474, bhi = 1101
2025-07-01 05:50:58.431
2025-07-01 05:50:58.439 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:50:58.447 r"""
2025-07-01 05:50:58.459 When replacing one block of lines with another, search the blocks
2025-07-01 05:50:58.470 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:50:58.480 synch point, and intraline difference marking is done on the
2025-07-01 05:50:58.490 similar pair. Lots of work, but often worth it.
2025-07-01 05:50:58.498
2025-07-01 05:50:58.505 Example:
2025-07-01 05:50:58.512
2025-07-01 05:50:58.520 >>> d = Differ()
2025-07-01 05:50:58.530 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:50:58.539 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:50:58.546 >>> print(''.join(results), end="")
2025-07-01 05:50:58.551 - abcDefghiJkl
2025-07-01 05:50:58.569 + abcdefGhijkl
2025-07-01 05:50:58.585 """
2025-07-01 05:50:58.591
2025-07-01 05:50:58.597 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:50:58.603 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:50:58.609 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:50:58.621 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:50:58.631 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:50:58.645
2025-07-01 05:50:58.658 # search for the pair that matches best without being identical
2025-07-01 05:50:58.670 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:50:58.683 # on junk -- unless we have to)
2025-07-01 05:50:58.696 for j in range(blo, bhi):
2025-07-01 05:50:58.707 bj = b[j]
2025-07-01 05:50:58.715 cruncher.set_seq2(bj)
2025-07-01 05:50:58.724 for i in range(alo, ahi):
2025-07-01 05:50:58.733 ai = a[i]
2025-07-01 05:50:58.740 if ai == bj:
2025-07-01 05:50:58.747 if eqi is None:
2025-07-01 05:50:58.753 eqi, eqj = i, j
2025-07-01 05:50:58.760 continue
2025-07-01 05:50:58.767 cruncher.set_seq1(ai)
2025-07-01 05:50:58.775 # computing similarity is expensive, so use the quick
2025-07-01 05:50:58.786 # upper bounds first -- have seen this speed up messy
2025-07-01 05:50:58.794 # compares by a factor of 3.
2025-07-01 05:50:58.800 # note that ratio() is only expensive to compute the first
2025-07-01 05:50:58.805 # time it's called on a sequence pair; the expensive part
2025-07-01 05:50:58.816 # of the computation is cached by cruncher
2025-07-01 05:50:58.827 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:50:58.835 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:50:58.843 cruncher.ratio() > best_ratio:
2025-07-01 05:50:58.851 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:50:58.862 if best_ratio < cutoff:
2025-07-01 05:50:58.873 # no non-identical "pretty close" pair
2025-07-01 05:50:58.883 if eqi is None:
2025-07-01 05:50:58.891 # no identical pair either -- treat it as a straight replace
2025-07-01 05:50:58.897 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:50:58.902 return
2025-07-01 05:50:58.908 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:50:58.913 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:50:58.918 else:
2025-07-01 05:50:58.924 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:50:58.930 eqi = None
2025-07-01 05:50:58.935
2025-07-01 05:50:58.940 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:50:58.947 # identical
2025-07-01 05:50:58.959
2025-07-01 05:50:58.969 # pump out diffs from before the synch point
2025-07-01 05:50:58.976 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:50:58.984
2025-07-01 05:50:58.991 # do intraline marking on the synch pair
2025-07-01 05:50:58.997 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:50:59.002 if eqi is None:
2025-07-01 05:50:59.009 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:50:59.015 atags = btags = ""
2025-07-01 05:50:59.021 cruncher.set_seqs(aelt, belt)
2025-07-01 05:50:59.033 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:50:59.042 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:50:59.050 if tag == 'replace':
2025-07-01 05:50:59.056 atags += '^' * la
2025-07-01 05:50:59.061 btags += '^' * lb
2025-07-01 05:50:59.067 elif tag == 'delete':
2025-07-01 05:50:59.072 atags += '-' * la
2025-07-01 05:50:59.077 elif tag == 'insert':
2025-07-01 05:50:59.081 btags += '+' * lb
2025-07-01 05:50:59.086 elif tag == 'equal':
2025-07-01 05:50:59.093 atags += ' ' * la
2025-07-01 05:50:59.106 btags += ' ' * lb
2025-07-01 05:50:59.116 else:
2025-07-01 05:50:59.125 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:50:59.131 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:50:59.138 else:
2025-07-01 05:50:59.142 # the synch pair is identical
2025-07-01 05:50:59.149 yield ' ' + aelt
2025-07-01 05:50:59.155
2025-07-01 05:50:59.161 # pump out diffs from after the synch point
2025-07-01 05:50:59.167 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:50:59.178
2025-07-01 05:50:59.187 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:50:59.199 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:50:59.208
2025-07-01 05:50:59.215 self = <difflib.Differ object at [hex]>
2025-07-01 05:50:59.227 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:50:59.240 alo = 475, ahi = 1101
2025-07-01 05:50:59.253 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:50:59.262 blo = 475, bhi = 1101
2025-07-01 05:50:59.271
2025-07-01 05:50:59.282 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:50:59.296 g = []
2025-07-01 05:50:59.311 if alo < ahi:
2025-07-01 05:50:59.320 if blo < bhi:
2025-07-01 05:50:59.329 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:50:59.338 else:
2025-07-01 05:50:59.345 g = self._dump('-', a, alo, ahi)
2025-07-01 05:50:59.352 elif blo < bhi:
2025-07-01 05:50:59.359 g = self._dump('+', b, blo, bhi)
2025-07-01 05:50:59.365
2025-07-01 05:50:59.371 > yield from g
2025-07-01 05:50:59.377
2025-07-01 05:50:59.383 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:50:59.389 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:50:59.396
2025-07-01 05:50:59.401 self = <difflib.Differ object at [hex]>
2025-07-01 05:50:59.407 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:50:59.412 alo = 475, ahi = 1101
2025-07-01 05:50:59.423 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:50:59.432 blo = 475, bhi = 1101
2025-07-01 05:50:59.439
2025-07-01 05:50:59.445 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:50:59.451 r"""
2025-07-01 05:50:59.459 When replacing one block of lines with another, search the blocks
2025-07-01 05:50:59.467 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:50:59.480 synch point, and intraline difference marking is done on the
2025-07-01 05:50:59.488 similar pair. Lots of work, but often worth it.
2025-07-01 05:50:59.494
2025-07-01 05:50:59.499 Example:
2025-07-01 05:50:59.508
2025-07-01 05:50:59.516 >>> d = Differ()
2025-07-01 05:50:59.523 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:50:59.529 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:50:59.534 >>> print(''.join(results), end="")
2025-07-01 05:50:59.540 - abcDefghiJkl
2025-07-01 05:50:59.549 + abcdefGhijkl
2025-07-01 05:50:59.565 """
2025-07-01 05:50:59.571
2025-07-01 05:50:59.578 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:50:59.585 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:50:59.592 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:50:59.599 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:50:59.606 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:50:59.617
2025-07-01 05:50:59.627 # search for the pair that matches best without being identical
2025-07-01 05:50:59.634 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:50:59.642 # on junk -- unless we have to)
2025-07-01 05:50:59.650 for j in range(blo, bhi):
2025-07-01 05:50:59.661 bj = b[j]
2025-07-01 05:50:59.671 cruncher.set_seq2(bj)
2025-07-01 05:50:59.677 for i in range(alo, ahi):
2025-07-01 05:50:59.682 ai = a[i]
2025-07-01 05:50:59.695 if ai == bj:
2025-07-01 05:50:59.702 if eqi is None:
2025-07-01 05:50:59.710 eqi, eqj = i, j
2025-07-01 05:50:59.716 continue
2025-07-01 05:50:59.723 cruncher.set_seq1(ai)
2025-07-01 05:50:59.733 # computing similarity is expensive, so use the quick
2025-07-01 05:50:59.745 # upper bounds first -- have seen this speed up messy
2025-07-01 05:50:59.756 # compares by a factor of 3.
2025-07-01 05:50:59.766 # note that ratio() is only expensive to compute the first
2025-07-01 05:50:59.776 # time it's called on a sequence pair; the expensive part
2025-07-01 05:50:59.784 # of the computation is cached by cruncher
2025-07-01 05:50:59.796 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:50:59.805 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:50:59.816 cruncher.ratio() > best_ratio:
2025-07-01 05:50:59.825 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:50:59.835 if best_ratio < cutoff:
2025-07-01 05:50:59.845 # no non-identical "pretty close" pair
2025-07-01 05:50:59.856 if eqi is None:
2025-07-01 05:50:59.864 # no identical pair either -- treat it as a straight replace
2025-07-01 05:50:59.870 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:50:59.874 return
2025-07-01 05:50:59.879 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:50:59.884 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:50:59.888 else:
2025-07-01 05:50:59.892 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:50:59.897 eqi = None
2025-07-01 05:50:59.901
2025-07-01 05:50:59.905 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:50:59.910 # identical
2025-07-01 05:50:59.914
2025-07-01 05:50:59.919 # pump out diffs from before the synch point
2025-07-01 05:50:59.923 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:50:59.936
2025-07-01 05:50:59.946 # do intraline marking on the synch pair
2025-07-01 05:50:59.953 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:50:59.958 if eqi is None:
2025-07-01 05:50:59.964 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:50:59.970 atags = btags = ""
2025-07-01 05:50:59.983 cruncher.set_seqs(aelt, belt)
2025-07-01 05:50:59.993 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:51:00.001 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:51:00.009 if tag == 'replace':
2025-07-01 05:51:00.015 atags += '^' * la
2025-07-01 05:51:00.023 btags += '^' * lb
2025-07-01 05:51:00.033 elif tag == 'delete':
2025-07-01 05:51:00.041 atags += '-' * la
2025-07-01 05:51:00.050 elif tag == 'insert':
2025-07-01 05:51:00.063 btags += '+' * lb
2025-07-01 05:51:00.073 elif tag == 'equal':
2025-07-01 05:51:00.080 atags += ' ' * la
2025-07-01 05:51:00.085 btags += ' ' * lb
2025-07-01 05:51:00.092 else:
2025-07-01 05:51:00.100 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:51:00.106 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:51:00.114 else:
2025-07-01 05:51:00.120 # the synch pair is identical
2025-07-01 05:51:00.125 yield ' ' + aelt
2025-07-01 05:51:00.130
2025-07-01 05:51:00.137 # pump out diffs from after the synch point
2025-07-01 05:51:00.143 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:51:00.149
2025-07-01 05:51:00.154 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:51:00.159 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:51:00.165
2025-07-01 05:51:00.171 self = <difflib.Differ object at [hex]>
2025-07-01 05:51:00.178 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:51:00.188 alo = 476, ahi = 1101
2025-07-01 05:51:00.197 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:51:00.206 blo = 476, bhi = 1101
2025-07-01 05:51:00.213
2025-07-01 05:51:00.219 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:51:00.227 g = []
2025-07-01 05:51:00.237 if alo < ahi:
2025-07-01 05:51:00.245 if blo < bhi:
2025-07-01 05:51:00.252 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:51:00.259 else:
2025-07-01 05:51:00.267 g = self._dump('-', a, alo, ahi)
2025-07-01 05:51:00.278 elif blo < bhi:
2025-07-01 05:51:00.287 g = self._dump('+', b, blo, bhi)
2025-07-01 05:51:00.294
2025-07-01 05:51:00.304 > yield from g
2025-07-01 05:51:00.313
2025-07-01 05:51:00.322 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:51:00.333 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:51:00.343
2025-07-01 05:51:00.351 self = <difflib.Differ object at [hex]>
2025-07-01 05:51:00.359 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:51:00.366 alo = 476, ahi = 1101
2025-07-01 05:51:00.375 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:51:00.385 blo = 476, bhi = 1101
2025-07-01 05:51:00.393
2025-07-01 05:51:00.399 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:51:00.405 r"""
2025-07-01 05:51:00.411 When replacing one block of lines with another, search the blocks
2025-07-01 05:51:00.416 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:51:00.422 synch point, and intraline difference marking is done on the
2025-07-01 05:51:00.432 similar pair. Lots of work, but often worth it.
2025-07-01 05:51:00.440
2025-07-01 05:51:00.447 Example:
2025-07-01 05:51:00.453
2025-07-01 05:51:00.458 >>> d = Differ()
2025-07-01 05:51:00.463 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:51:00.468 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:51:00.473 >>> print(''.join(results), end="")
2025-07-01 05:51:00.477 - abcDefghiJkl
2025-07-01 05:51:00.486 + abcdefGhijkl
2025-07-01 05:51:00.495 """
2025-07-01 05:51:00.500
2025-07-01 05:51:00.505 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:51:00.511 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:51:00.519 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:51:00.527 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:51:00.534 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:51:00.540
2025-07-01 05:51:00.546 # search for the pair that matches best without being identical
2025-07-01 05:51:00.552 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:51:00.558 # on junk -- unless we have to)
2025-07-01 05:51:00.564 for j in range(blo, bhi):
2025-07-01 05:51:00.570 bj = b[j]
2025-07-01 05:51:00.576 cruncher.set_seq2(bj)
2025-07-01 05:51:00.582 for i in range(alo, ahi):
2025-07-01 05:51:00.588 ai = a[i]
2025-07-01 05:51:00.594 if ai == bj:
2025-07-01 05:51:00.600 if eqi is None:
2025-07-01 05:51:00.609 eqi, eqj = i, j
2025-07-01 05:51:00.620 continue
2025-07-01 05:51:00.628 cruncher.set_seq1(ai)
2025-07-01 05:51:00.635 # computing similarity is expensive, so use the quick
2025-07-01 05:51:00.642 # upper bounds first -- have seen this speed up messy
2025-07-01 05:51:00.648 # compares by a factor of 3.
2025-07-01 05:51:00.653 # note that ratio() is only expensive to compute the first
2025-07-01 05:51:00.658 # time it's called on a sequence pair; the expensive part
2025-07-01 05:51:00.662 # of the computation is cached by cruncher
2025-07-01 05:51:00.668 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:51:00.673 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:51:00.679 cruncher.ratio() > best_ratio:
2025-07-01 05:51:00.685 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:51:00.690 if best_ratio < cutoff:
2025-07-01 05:51:00.700 # no non-identical "pretty close" pair
2025-07-01 05:51:00.711 if eqi is None:
2025-07-01 05:51:00.719 # no identical pair either -- treat it as a straight replace
2025-07-01 05:51:00.727 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:51:00.737 return
2025-07-01 05:51:00.749 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:51:00.758 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:51:00.766 else:
2025-07-01 05:51:00.777 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:51:00.788 eqi = None
2025-07-01 05:51:00.797
2025-07-01 05:51:00.807 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:51:00.815 # identical
2025-07-01 05:51:00.823
2025-07-01 05:51:00.835 # pump out diffs from before the synch point
2025-07-01 05:51:00.845 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:51:00.853
2025-07-01 05:51:00.861 # do intraline marking on the synch pair
2025-07-01 05:51:00.870 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:51:00.876 if eqi is None:
2025-07-01 05:51:00.882 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:51:00.888 atags = btags = ""
2025-07-01 05:51:00.895 cruncher.set_seqs(aelt, belt)
2025-07-01 05:51:00.903 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:51:00.914 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:51:00.923 if tag == 'replace':
2025-07-01 05:51:00.931 atags += '^' * la
2025-07-01 05:51:00.939 btags += '^' * lb
2025-07-01 05:51:00.947 elif tag == 'delete':
2025-07-01 05:51:00.958 atags += '-' * la
2025-07-01 05:51:00.966 elif tag == 'insert':
2025-07-01 05:51:00.973 btags += '+' * lb
2025-07-01 05:51:00.980 elif tag == 'equal':
2025-07-01 05:51:00.986 atags += ' ' * la
2025-07-01 05:51:00.992 btags += ' ' * lb
2025-07-01 05:51:01.002 else:
2025-07-01 05:51:01.009 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:51:01.017 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:51:01.023 else:
2025-07-01 05:51:01.031 # the synch pair is identical
2025-07-01 05:51:01.038 yield ' ' + aelt
2025-07-01 05:51:01.048
2025-07-01 05:51:01.057 # pump out diffs from after the synch point
2025-07-01 05:51:01.064 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:51:01.071
2025-07-01 05:51:01.076 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:51:01.083 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:51:01.088
2025-07-01 05:51:01.101 self = <difflib.Differ object at [hex]>
2025-07-01 05:51:01.112 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:51:01.119 alo = 477, ahi = 1101
2025-07-01 05:51:01.131 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:51:01.140 blo = 477, bhi = 1101
2025-07-01 05:51:01.147
2025-07-01 05:51:01.154 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:51:01.160 g = []
2025-07-01 05:51:01.165 if alo < ahi:
2025-07-01 05:51:01.171 if blo < bhi:
2025-07-01 05:51:01.179 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:51:01.191 else:
2025-07-01 05:51:01.200 g = self._dump('-', a, alo, ahi)
2025-07-01 05:51:01.208 elif blo < bhi:
2025-07-01 05:51:01.216 g = self._dump('+', b, blo, bhi)
2025-07-01 05:51:01.226
2025-07-01 05:51:01.236 > yield from g
2025-07-01 05:51:01.244
2025-07-01 05:51:01.251 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:51:01.257 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:51:01.264
2025-07-01 05:51:01.271 self = <difflib.Differ object at [hex]>
2025-07-01 05:51:01.283 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:51:01.292 alo = 477, ahi = 1101
2025-07-01 05:51:01.299 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:51:01.305 blo = 477, bhi = 1101
2025-07-01 05:51:01.310
2025-07-01 05:51:01.316 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:51:01.322 r"""
2025-07-01 05:51:01.328 When replacing one block of lines with another, search the blocks
2025-07-01 05:51:01.334 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:51:01.341 synch point, and intraline difference marking is done on the
2025-07-01 05:51:01.347 similar pair. Lots of work, but often worth it.
2025-07-01 05:51:01.354
2025-07-01 05:51:01.364 Example:
2025-07-01 05:51:01.372
2025-07-01 05:51:01.378 >>> d = Differ()
2025-07-01 05:51:01.387 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:51:01.396 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:51:01.407 >>> print(''.join(results), end="")
2025-07-01 05:51:01.413 - abcDefghiJkl
2025-07-01 05:51:01.425 + abcdefGhijkl
2025-07-01 05:51:01.437 """
2025-07-01 05:51:01.443
2025-07-01 05:51:01.448 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:51:01.453 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:51:01.458 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:51:01.462 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:51:01.467 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:51:01.472
2025-07-01 05:51:01.478 # search for the pair that matches best without being identical
2025-07-01 05:51:01.484 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:51:01.490 # on junk -- unless we have to)
2025-07-01 05:51:01.499 for j in range(blo, bhi):
2025-07-01 05:51:01.508 bj = b[j]
2025-07-01 05:51:01.515 cruncher.set_seq2(bj)
2025-07-01 05:51:01.520 for i in range(alo, ahi):
2025-07-01 05:51:01.526 ai = a[i]
2025-07-01 05:51:01.532 if ai == bj:
2025-07-01 05:51:01.538 if eqi is None:
2025-07-01 05:51:01.544 eqi, eqj = i, j
2025-07-01 05:51:01.551 continue
2025-07-01 05:51:01.559 cruncher.set_seq1(ai)
2025-07-01 05:51:01.566 # computing similarity is expensive, so use the quick
2025-07-01 05:51:01.572 # upper bounds first -- have seen this speed up messy
2025-07-01 05:51:01.577 # compares by a factor of 3.
2025-07-01 05:51:01.582 # note that ratio() is only expensive to compute the first
2025-07-01 05:51:01.587 # time it's called on a sequence pair; the expensive part
2025-07-01 05:51:01.595 # of the computation is cached by cruncher
2025-07-01 05:51:01.607 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:51:01.616 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:51:01.622 cruncher.ratio() > best_ratio:
2025-07-01 05:51:01.628 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:51:01.635 if best_ratio < cutoff:
2025-07-01 05:51:01.641 # no non-identical "pretty close" pair
2025-07-01 05:51:01.646 if eqi is None:
2025-07-01 05:51:01.652 # no identical pair either -- treat it as a straight replace
2025-07-01 05:51:01.664 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:51:01.672 return
2025-07-01 05:51:01.678 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:51:01.685 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:51:01.691 else:
2025-07-01 05:51:01.699 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:51:01.708 eqi = None
2025-07-01 05:51:01.716
2025-07-01 05:51:01.722 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:51:01.733 # identical
2025-07-01 05:51:01.743
2025-07-01 05:51:01.752 # pump out diffs from before the synch point
2025-07-01 05:51:01.766 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:51:01.778
2025-07-01 05:51:01.787 # do intraline marking on the synch pair
2025-07-01 05:51:01.800 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:51:01.809 if eqi is None:
2025-07-01 05:51:01.818 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:51:01.829 atags = btags = ""
2025-07-01 05:51:01.838 cruncher.set_seqs(aelt, belt)
2025-07-01 05:51:01.846 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:51:01.855 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:51:01.867 if tag == 'replace':
2025-07-01 05:51:01.876 atags += '^' * la
2025-07-01 05:51:01.885 btags += '^' * lb
2025-07-01 05:51:01.891 elif tag == 'delete':
2025-07-01 05:51:01.898 atags += '-' * la
2025-07-01 05:51:01.904 elif tag == 'insert':
2025-07-01 05:51:01.916 btags += '+' * lb
2025-07-01 05:51:01.924 elif tag == 'equal':
2025-07-01 05:51:01.932 atags += ' ' * la
2025-07-01 05:51:01.940 btags += ' ' * lb
2025-07-01 05:51:01.946 else:
2025-07-01 05:51:01.960 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:51:01.971 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:51:01.982 else:
2025-07-01 05:51:01.992 # the synch pair is identical
2025-07-01 05:51:02.001 yield ' ' + aelt
2025-07-01 05:51:02.009
2025-07-01 05:51:02.016 # pump out diffs from after the synch point
2025-07-01 05:51:02.022 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:51:02.033
2025-07-01 05:51:02.046 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:51:02.056 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:51:02.064
2025-07-01 05:51:02.070 self = <difflib.Differ object at [hex]>
2025-07-01 05:51:02.077 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:51:02.084 alo = 478, ahi = 1101
2025-07-01 05:51:02.091 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:51:02.097 blo = 478, bhi = 1101
2025-07-01 05:51:02.102
2025-07-01 05:51:02.107 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:51:02.112 g = []
2025-07-01 05:51:02.116 if alo < ahi:
2025-07-01 05:51:02.121 if blo < bhi:
2025-07-01 05:51:02.126 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:51:02.141 else:
2025-07-01 05:51:02.154 g = self._dump('-', a, alo, ahi)
2025-07-01 05:51:02.163 elif blo < bhi:
2025-07-01 05:51:02.171 g = self._dump('+', b, blo, bhi)
2025-07-01 05:51:02.177
2025-07-01 05:51:02.182 > yield from g
2025-07-01 05:51:02.189
2025-07-01 05:51:02.194 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:51:02.199 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:51:02.205
2025-07-01 05:51:02.212 self = <difflib.Differ object at [hex]>
2025-07-01 05:51:02.218 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:51:02.225 alo = 478, ahi = 1101
2025-07-01 05:51:02.233 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:51:02.239 blo = 478, bhi = 1101
2025-07-01 05:51:02.245
2025-07-01 05:51:02.255 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:51:02.263 r"""
2025-07-01 05:51:02.269 When replacing one block of lines with another, search the blocks
2025-07-01 05:51:02.275 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:51:02.283 synch point, and intraline difference marking is done on the
2025-07-01 05:51:02.294 similar pair. Lots of work, but often worth it.
2025-07-01 05:51:02.304
2025-07-01 05:51:02.312 Example:
2025-07-01 05:51:02.319
2025-07-01 05:51:02.327 >>> d = Differ()
2025-07-01 05:51:02.339 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:51:02.349 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:51:02.357 >>> print(''.join(results), end="")
2025-07-01 05:51:02.366 - abcDefghiJkl
2025-07-01 05:51:02.387 + abcdefGhijkl
2025-07-01 05:51:02.415 """
2025-07-01 05:51:02.424
2025-07-01 05:51:02.430 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:51:02.440 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:51:02.450 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:51:02.458 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:51:02.464 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:51:02.470
2025-07-01 05:51:02.475 # search for the pair that matches best without being identical
2025-07-01 05:51:02.480 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:51:02.487 # on junk -- unless we have to)
2025-07-01 05:51:02.498 for j in range(blo, bhi):
2025-07-01 05:51:02.507 bj = b[j]
2025-07-01 05:51:02.513 cruncher.set_seq2(bj)
2025-07-01 05:51:02.519 for i in range(alo, ahi):
2025-07-01 05:51:02.524 ai = a[i]
2025-07-01 05:51:02.534 if ai == bj:
2025-07-01 05:51:02.540 if eqi is None:
2025-07-01 05:51:02.544 eqi, eqj = i, j
2025-07-01 05:51:02.549 continue
2025-07-01 05:51:02.554 cruncher.set_seq1(ai)
2025-07-01 05:51:02.559 # computing similarity is expensive, so use the quick
2025-07-01 05:51:02.564 # upper bounds first -- have seen this speed up messy
2025-07-01 05:51:02.568 # compares by a factor of 3.
2025-07-01 05:51:02.573 # note that ratio() is only expensive to compute the first
2025-07-01 05:51:02.577 # time it's called on a sequence pair; the expensive part
2025-07-01 05:51:02.582 # of the computation is cached by cruncher
2025-07-01 05:51:02.586 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:51:02.591 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:51:02.595 cruncher.ratio() > best_ratio:
2025-07-01 05:51:02.600 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:51:02.605 if best_ratio < cutoff:
2025-07-01 05:51:02.611 # no non-identical "pretty close" pair
2025-07-01 05:51:02.620 if eqi is None:
2025-07-01 05:51:02.633 # no identical pair either -- treat it as a straight replace
2025-07-01 05:51:02.642 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:51:02.650 return
2025-07-01 05:51:02.656 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:51:02.664 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:51:02.671 else:
2025-07-01 05:51:02.684 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:51:02.698 eqi = None
2025-07-01 05:51:02.707
2025-07-01 05:51:02.715 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:51:02.722 # identical
2025-07-01 05:51:02.728
2025-07-01 05:51:02.735 # pump out diffs from before the synch point
2025-07-01 05:51:02.743 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:51:02.753
2025-07-01 05:51:02.761 # do intraline marking on the synch pair
2025-07-01 05:51:02.768 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:51:02.774 if eqi is None:
2025-07-01 05:51:02.779 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:51:02.785 atags = btags = ""
2025-07-01 05:51:02.791 cruncher.set_seqs(aelt, belt)
2025-07-01 05:51:02.798 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:51:02.809 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:51:02.818 if tag == 'replace':
2025-07-01 05:51:02.824 atags += '^' * la
2025-07-01 05:51:02.830 btags += '^' * lb
2025-07-01 05:51:02.836 elif tag == 'delete':
2025-07-01 05:51:02.841 atags += '-' * la
2025-07-01 05:51:02.847 elif tag == 'insert':
2025-07-01 05:51:02.853 btags += '+' * lb
2025-07-01 05:51:02.859 elif tag == 'equal':
2025-07-01 05:51:02.868 atags += ' ' * la
2025-07-01 05:51:02.877 btags += ' ' * lb
2025-07-01 05:51:02.884 else:
2025-07-01 05:51:02.890 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:51:02.896 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:51:02.902 else:
2025-07-01 05:51:02.907 # the synch pair is identical
2025-07-01 05:51:02.914 yield ' ' + aelt
2025-07-01 05:51:02.922
2025-07-01 05:51:02.934 # pump out diffs from after the synch point
2025-07-01 05:51:02.941 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:51:02.947
2025-07-01 05:51:02.953 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:51:02.965 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:51:02.976
2025-07-01 05:51:02.984 self = <difflib.Differ object at [hex]>
2025-07-01 05:51:02.990 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:51:02.995 alo = 479, ahi = 1101
2025-07-01 05:51:03.001 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:51:03.007 blo = 479, bhi = 1101
2025-07-01 05:51:03.014
2025-07-01 05:51:03.025 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:51:03.035 g = []
2025-07-01 05:51:03.048 if alo < ahi:
2025-07-01 05:51:03.059 if blo < bhi:
2025-07-01 05:51:03.068 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:51:03.076 else:
2025-07-01 05:51:03.088 g = self._dump('-', a, alo, ahi)
2025-07-01 05:51:03.096 elif blo < bhi:
2025-07-01 05:51:03.104 g = self._dump('+', b, blo, bhi)
2025-07-01 05:51:03.110
2025-07-01 05:51:03.116 > yield from g
2025-07-01 05:51:03.121
2025-07-01 05:51:03.127 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:51:03.134 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:51:03.140
2025-07-01 05:51:03.147 self = <difflib.Differ object at [hex]>
2025-07-01 05:51:03.155 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:51:03.162 alo = 479, ahi = 1101
2025-07-01 05:51:03.171 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:51:03.181 blo = 479, bhi = 1101
2025-07-01 05:51:03.190
2025-07-01 05:51:03.196 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:51:03.201 r"""
2025-07-01 05:51:03.207 When replacing one block of lines with another, search the blocks
2025-07-01 05:51:03.213 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:51:03.218 synch point, and intraline difference marking is done on the
2025-07-01 05:51:03.228 similar pair. Lots of work, but often worth it.
2025-07-01 05:51:03.238
2025-07-01 05:51:03.246 Example:
2025-07-01 05:51:03.251
2025-07-01 05:51:03.257 >>> d = Differ()
2025-07-01 05:51:03.262 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:51:03.267 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:51:03.273 >>> print(''.join(results), end="")
2025-07-01 05:51:03.278 - abcDefghiJkl
2025-07-01 05:51:03.297 + abcdefGhijkl
2025-07-01 05:51:03.311 """
2025-07-01 05:51:03.321
2025-07-01 05:51:03.328 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:51:03.337 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:51:03.350 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:51:03.362 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:51:03.370 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:51:03.376
2025-07-01 05:51:03.382 # search for the pair that matches best without being identical
2025-07-01 05:51:03.393 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:51:03.402 # on junk -- unless we have to)
2025-07-01 05:51:03.409 for j in range(blo, bhi):
2025-07-01 05:51:03.415 bj = b[j]
2025-07-01 05:51:03.421 cruncher.set_seq2(bj)
2025-07-01 05:51:03.426 for i in range(alo, ahi):
2025-07-01 05:51:03.436 ai = a[i]
2025-07-01 05:51:03.444 if ai == bj:
2025-07-01 05:51:03.452 if eqi is None:
2025-07-01 05:51:03.458 eqi, eqj = i, j
2025-07-01 05:51:03.467 continue
2025-07-01 05:51:03.474 cruncher.set_seq1(ai)
2025-07-01 05:51:03.481 # computing similarity is expensive, so use the quick
2025-07-01 05:51:03.489 # upper bounds first -- have seen this speed up messy
2025-07-01 05:51:03.496 # compares by a factor of 3.
2025-07-01 05:51:03.502 # note that ratio() is only expensive to compute the first
2025-07-01 05:51:03.507 # time it's called on a sequence pair; the expensive part
2025-07-01 05:51:03.513 # of the computation is cached by cruncher
2025-07-01 05:51:03.519 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:51:03.528 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:51:03.536 cruncher.ratio() > best_ratio:
2025-07-01 05:51:03.542 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:51:03.548 if best_ratio < cutoff:
2025-07-01 05:51:03.553 # no non-identical "pretty close" pair
2025-07-01 05:51:03.559 if eqi is None:
2025-07-01 05:51:03.566 # no identical pair either -- treat it as a straight replace
2025-07-01 05:51:03.574 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:51:03.582 return
2025-07-01 05:51:03.588 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:51:03.594 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:51:03.599 else:
2025-07-01 05:51:03.605 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:51:03.611 eqi = None
2025-07-01 05:51:03.616
2025-07-01 05:51:03.622 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:51:03.628 # identical
2025-07-01 05:51:03.633
2025-07-01 05:51:03.639 # pump out diffs from before the synch point
2025-07-01 05:51:03.645 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:51:03.650
2025-07-01 05:51:03.656 # do intraline marking on the synch pair
2025-07-01 05:51:03.662 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:51:03.668 if eqi is None:
2025-07-01 05:51:03.675 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:51:03.686 atags = btags = ""
2025-07-01 05:51:03.694 cruncher.set_seqs(aelt, belt)
2025-07-01 05:51:03.701 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:51:03.707 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:51:03.713 if tag == 'replace':
2025-07-01 05:51:03.720 atags += '^' * la
2025-07-01 05:51:03.726 btags += '^' * lb
2025-07-01 05:51:03.732 elif tag == 'delete':
2025-07-01 05:51:03.738 atags += '-' * la
2025-07-01 05:51:03.744 elif tag == 'insert':
2025-07-01 05:51:03.751 btags += '+' * lb
2025-07-01 05:51:03.761 elif tag == 'equal':
2025-07-01 05:51:03.770 atags += ' ' * la
2025-07-01 05:51:03.776 btags += ' ' * lb
2025-07-01 05:51:03.784 else:
2025-07-01 05:51:03.790 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:51:03.800 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:51:03.809 else:
2025-07-01 05:51:03.816 # the synch pair is identical
2025-07-01 05:51:03.823 yield ' ' + aelt
2025-07-01 05:51:03.828
2025-07-01 05:51:03.834 # pump out diffs from after the synch point
2025-07-01 05:51:03.844 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:51:03.855
2025-07-01 05:51:03.864 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:51:03.877 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:51:03.887
2025-07-01 05:51:03.896 self = <difflib.Differ object at [hex]>
2025-07-01 05:51:03.904 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:51:03.910 alo = 480, ahi = 1101
2025-07-01 05:51:03.923 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:51:03.933 blo = 480, bhi = 1101
2025-07-01 05:51:03.944
2025-07-01 05:51:03.951 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:51:03.957 g = []
2025-07-01 05:51:03.964 if alo < ahi:
2025-07-01 05:51:03.970 if blo < bhi:
2025-07-01 05:51:03.983 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:51:03.994 else:
2025-07-01 05:51:04.005 g = self._dump('-', a, alo, ahi)
2025-07-01 05:51:04.017 elif blo < bhi:
2025-07-01 05:51:04.028 g = self._dump('+', b, blo, bhi)
2025-07-01 05:51:04.034
2025-07-01 05:51:04.039 > yield from g
2025-07-01 05:51:04.044
2025-07-01 05:51:04.049 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:51:04.054 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:51:04.065
2025-07-01 05:51:04.073 self = <difflib.Differ object at [hex]>
2025-07-01 05:51:04.083 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:51:04.093 alo = 480, ahi = 1101
2025-07-01 05:51:04.100 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:51:04.106 blo = 480, bhi = 1101
2025-07-01 05:51:04.112
2025-07-01 05:51:04.117 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:51:04.124 r"""
2025-07-01 05:51:04.131 When replacing one block of lines with another, search the blocks
2025-07-01 05:51:04.137 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:51:04.147 synch point, and intraline difference marking is done on the
2025-07-01 05:51:04.158 similar pair. Lots of work, but often worth it.
2025-07-01 05:51:04.167
2025-07-01 05:51:04.173 Example:
2025-07-01 05:51:04.178
2025-07-01 05:51:04.184 >>> d = Differ()
2025-07-01 05:51:04.189 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:51:04.195 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:51:04.201 >>> print(''.join(results), end="")
2025-07-01 05:51:04.207 - abcDefghiJkl
2025-07-01 05:51:04.224 + abcdefGhijkl
2025-07-01 05:51:04.241 """
2025-07-01 05:51:04.247
2025-07-01 05:51:04.253 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:51:04.258 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:51:04.270 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:51:04.278 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:51:04.288 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:51:04.295
2025-07-01 05:51:04.302 # search for the pair that matches best without being identical
2025-07-01 05:51:04.311 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:51:04.317 # on junk -- unless we have to)
2025-07-01 05:51:04.323 for j in range(blo, bhi):
2025-07-01 05:51:04.327 bj = b[j]
2025-07-01 05:51:04.333 cruncher.set_seq2(bj)
2025-07-01 05:51:04.345 for i in range(alo, ahi):
2025-07-01 05:51:04.355 ai = a[i]
2025-07-01 05:51:04.363 if ai == bj:
2025-07-01 05:51:04.370 if eqi is None:
2025-07-01 05:51:04.381 eqi, eqj = i, j
2025-07-01 05:51:04.390 continue
2025-07-01 05:51:04.402 cruncher.set_seq1(ai)
2025-07-01 05:51:04.415 # computing similarity is expensive, so use the quick
2025-07-01 05:51:04.425 # upper bounds first -- have seen this speed up messy
2025-07-01 05:51:04.432 # compares by a factor of 3.
2025-07-01 05:51:04.442 # note that ratio() is only expensive to compute the first
2025-07-01 05:51:04.455 # time it's called on a sequence pair; the expensive part
2025-07-01 05:51:04.464 # of the computation is cached by cruncher
2025-07-01 05:51:04.473 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:51:04.480 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:51:04.494 cruncher.ratio() > best_ratio:
2025-07-01 05:51:04.504 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:51:04.512 if best_ratio < cutoff:
2025-07-01 05:51:04.519 # no non-identical "pretty close" pair
2025-07-01 05:51:04.528 if eqi is None:
2025-07-01 05:51:04.538 # no identical pair either -- treat it as a straight replace
2025-07-01 05:51:04.545 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:51:04.551 return
2025-07-01 05:51:04.562 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:51:04.574 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:51:04.582 else:
2025-07-01 05:51:04.593 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:51:04.605 eqi = None
2025-07-01 05:51:04.615
2025-07-01 05:51:04.626 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:51:04.637 # identical
2025-07-01 05:51:04.643
2025-07-01 05:51:04.649 # pump out diffs from before the synch point
2025-07-01 05:51:04.654 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:51:04.658
2025-07-01 05:51:04.663 # do intraline marking on the synch pair
2025-07-01 05:51:04.668 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:51:04.672 if eqi is None:
2025-07-01 05:51:04.677 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:51:04.682 atags = btags = ""
2025-07-01 05:51:04.687 cruncher.set_seqs(aelt, belt)
2025-07-01 05:51:04.692 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:51:04.701 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:51:04.705 if tag == 'replace':
2025-07-01 05:51:04.710 atags += '^' * la
2025-07-01 05:51:04.715 btags += '^' * lb
2025-07-01 05:51:04.720 elif tag == 'delete':
2025-07-01 05:51:04.725 atags += '-' * la
2025-07-01 05:51:04.733 elif tag == 'insert':
2025-07-01 05:51:04.739 btags += '+' * lb
2025-07-01 05:51:04.745 elif tag == 'equal':
2025-07-01 05:51:04.751 atags += ' ' * la
2025-07-01 05:51:04.758 btags += ' ' * lb
2025-07-01 05:51:04.764 else:
2025-07-01 05:51:04.771 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:51:04.778 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:51:04.784 else:
2025-07-01 05:51:04.791 # the synch pair is identical
2025-07-01 05:51:04.798 yield ' ' + aelt
2025-07-01 05:51:04.805
2025-07-01 05:51:04.816 # pump out diffs from after the synch point
2025-07-01 05:51:04.824 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:51:04.832
2025-07-01 05:51:04.839 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:51:04.845 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:51:04.857
2025-07-01 05:51:04.869 self = <difflib.Differ object at [hex]>
2025-07-01 05:51:04.879 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:51:04.889 alo = 481, ahi = 1101
2025-07-01 05:51:04.899 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:51:04.907 blo = 481, bhi = 1101
2025-07-01 05:51:04.914
2025-07-01 05:51:04.921 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:51:04.926 g = []
2025-07-01 05:51:04.936 if alo < ahi:
2025-07-01 05:51:04.944 if blo < bhi:
2025-07-01 05:51:04.951 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:51:04.958 else:
2025-07-01 05:51:04.964 g = self._dump('-', a, alo, ahi)
2025-07-01 05:51:04.969 elif blo < bhi:
2025-07-01 05:51:04.981 g = self._dump('+', b, blo, bhi)
2025-07-01 05:51:04.993
2025-07-01 05:51:05.002 > yield from g
2025-07-01 05:51:05.010
2025-07-01 05:51:05.016 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:51:05.023 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:51:05.028
2025-07-01 05:51:05.034 self = <difflib.Differ object at [hex]>
2025-07-01 05:51:05.041 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:51:05.046 alo = 481, ahi = 1101
2025-07-01 05:51:05.052 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:51:05.057 blo = 481, bhi = 1101
2025-07-01 05:51:05.064
2025-07-01 05:51:05.071 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:51:05.079 r"""
2025-07-01 05:51:05.087 When replacing one block of lines with another, search the blocks
2025-07-01 05:51:05.095 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:51:05.101 synch point, and intraline difference marking is done on the
2025-07-01 05:51:05.107 similar pair. Lots of work, but often worth it.
2025-07-01 05:51:05.114
2025-07-01 05:51:05.120 Example:
2025-07-01 05:51:05.125
2025-07-01 05:51:05.130 >>> d = Differ()
2025-07-01 05:51:05.137 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:51:05.144 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:51:05.150 >>> print(''.join(results), end="")
2025-07-01 05:51:05.160 - abcDefghiJkl
2025-07-01 05:51:05.177 + abcdefGhijkl
2025-07-01 05:51:05.200 """
2025-07-01 05:51:05.208
2025-07-01 05:51:05.213 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:51:05.219 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:51:05.229 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:51:05.238 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:51:05.247 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:51:05.254
2025-07-01 05:51:05.261 # search for the pair that matches best without being identical
2025-07-01 05:51:05.268 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:51:05.275 # on junk -- unless we have to)
2025-07-01 05:51:05.282 for j in range(blo, bhi):
2025-07-01 05:51:05.295 bj = b[j]
2025-07-01 05:51:05.302 cruncher.set_seq2(bj)
2025-07-01 05:51:05.310 for i in range(alo, ahi):
2025-07-01 05:51:05.317 ai = a[i]
2025-07-01 05:51:05.324 if ai == bj:
2025-07-01 05:51:05.330 if eqi is None:
2025-07-01 05:51:05.344 eqi, eqj = i, j
2025-07-01 05:51:05.357 continue
2025-07-01 05:51:05.369 cruncher.set_seq1(ai)
2025-07-01 05:51:05.380 # computing similarity is expensive, so use the quick
2025-07-01 05:51:05.389 # upper bounds first -- have seen this speed up messy
2025-07-01 05:51:05.396 # compares by a factor of 3.
2025-07-01 05:51:05.406 # note that ratio() is only expensive to compute the first
2025-07-01 05:51:05.418 # time it's called on a sequence pair; the expensive part
2025-07-01 05:51:05.429 # of the computation is cached by cruncher
2025-07-01 05:51:05.443 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:51:05.452 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:51:05.460 cruncher.ratio() > best_ratio:
2025-07-01 05:51:05.467 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:51:05.474 if best_ratio < cutoff:
2025-07-01 05:51:05.486 # no non-identical "pretty close" pair
2025-07-01 05:51:05.498 if eqi is None:
2025-07-01 05:51:05.508 # no identical pair either -- treat it as a straight replace
2025-07-01 05:51:05.522 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:51:05.533 return
2025-07-01 05:51:05.545 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:51:05.555 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:51:05.562 else:
2025-07-01 05:51:05.568 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:51:05.579 eqi = None
2025-07-01 05:51:05.587
2025-07-01 05:51:05.599 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:51:05.608 # identical
2025-07-01 05:51:05.617
2025-07-01 05:51:05.624 # pump out diffs from before the synch point
2025-07-01 05:51:05.631 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:51:05.637
2025-07-01 05:51:05.642 # do intraline marking on the synch pair
2025-07-01 05:51:05.652 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:51:05.665 if eqi is None:
2025-07-01 05:51:05.675 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:51:05.683 atags = btags = ""
2025-07-01 05:51:05.690 cruncher.set_seqs(aelt, belt)
2025-07-01 05:51:05.702 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:51:05.713 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:51:05.723 if tag == 'replace':
2025-07-01 05:51:05.730 atags += '^' * la
2025-07-01 05:51:05.743 btags += '^' * lb
2025-07-01 05:51:05.755 elif tag == 'delete':
2025-07-01 05:51:05.765 atags += '-' * la
2025-07-01 05:51:05.777 elif tag == 'insert':
2025-07-01 05:51:05.787 btags += '+' * lb
2025-07-01 05:51:05.796 elif tag == 'equal':
2025-07-01 05:51:05.803 atags += ' ' * la
2025-07-01 05:51:05.810 btags += ' ' * lb
2025-07-01 05:51:05.816 else:
2025-07-01 05:51:05.826 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:51:05.834 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:51:05.842 else:
2025-07-01 05:51:05.849 # the synch pair is identical
2025-07-01 05:51:05.855 yield ' ' + aelt
2025-07-01 05:51:05.861
2025-07-01 05:51:05.866 # pump out diffs from after the synch point
2025-07-01 05:51:05.871 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:51:05.875
2025-07-01 05:51:05.879 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:51:05.884 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:51:05.888
2025-07-01 05:51:05.892 self = <difflib.Differ object at [hex]>
2025-07-01 05:51:05.897 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:51:05.901 alo = 482, ahi = 1101
2025-07-01 05:51:05.906 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:51:05.911 blo = 482, bhi = 1101
2025-07-01 05:51:05.917
2025-07-01 05:51:05.930 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:51:05.939 g = []
2025-07-01 05:51:05.951 if alo < ahi:
2025-07-01 05:51:05.959 if blo < bhi:
2025-07-01 05:51:05.967 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:51:05.975 else:
2025-07-01 05:51:05.983 g = self._dump('-', a, alo, ahi)
2025-07-01 05:51:05.993 elif blo < bhi:
2025-07-01 05:51:05.999 g = self._dump('+', b, blo, bhi)
2025-07-01 05:51:06.005
2025-07-01 05:51:06.012 > yield from g
2025-07-01 05:51:06.018
2025-07-01 05:51:06.026 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:51:06.036 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:51:06.046
2025-07-01 05:51:06.052 self = <difflib.Differ object at [hex]>
2025-07-01 05:51:06.058 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:51:06.065 alo = 482, ahi = 1101
2025-07-01 05:51:06.075 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:51:06.083 blo = 482, bhi = 1101
2025-07-01 05:51:06.090
2025-07-01 05:51:06.096 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:51:06.103 r"""
2025-07-01 05:51:06.110 When replacing one block of lines with another, search the blocks
2025-07-01 05:51:06.120 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:51:06.131 synch point, and intraline difference marking is done on the
2025-07-01 05:51:06.139 similar pair. Lots of work, but often worth it.
2025-07-01 05:51:06.145
2025-07-01 05:51:06.151 Example:
2025-07-01 05:51:06.157
2025-07-01 05:51:06.163 >>> d = Differ()
2025-07-01 05:51:06.172 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:51:06.183 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:51:06.191 >>> print(''.join(results), end="")
2025-07-01 05:51:06.198 - abcDefghiJkl
2025-07-01 05:51:06.210 + abcdefGhijkl
2025-07-01 05:51:06.223 """
2025-07-01 05:51:06.233
2025-07-01 05:51:06.242 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:51:06.249 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:51:06.255 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:51:06.261 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:51:06.266 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:51:06.270
2025-07-01 05:51:06.276 # search for the pair that matches best without being identical
2025-07-01 05:51:06.281 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:51:06.287 # on junk -- unless we have to)
2025-07-01 05:51:06.296 for j in range(blo, bhi):
2025-07-01 05:51:06.306 bj = b[j]
2025-07-01 05:51:06.317 cruncher.set_seq2(bj)
2025-07-01 05:51:06.323 for i in range(alo, ahi):
2025-07-01 05:51:06.335 ai = a[i]
2025-07-01 05:51:06.346 if ai == bj:
2025-07-01 05:51:06.356 if eqi is None:
2025-07-01 05:51:06.366 eqi, eqj = i, j
2025-07-01 05:51:06.375 continue
2025-07-01 05:51:06.383 cruncher.set_seq1(ai)
2025-07-01 05:51:06.392 # computing similarity is expensive, so use the quick
2025-07-01 05:51:06.402 # upper bounds first -- have seen this speed up messy
2025-07-01 05:51:06.410 # compares by a factor of 3.
2025-07-01 05:51:06.416 # note that ratio() is only expensive to compute the first
2025-07-01 05:51:06.422 # time it's called on a sequence pair; the expensive part
2025-07-01 05:51:06.428 # of the computation is cached by cruncher
2025-07-01 05:51:06.435 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:51:06.442 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:51:06.451 cruncher.ratio() > best_ratio:
2025-07-01 05:51:06.457 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:51:06.462 if best_ratio < cutoff:
2025-07-01 05:51:06.470 # no non-identical "pretty close" pair
2025-07-01 05:51:06.477 if eqi is None:
2025-07-01 05:51:06.483 # no identical pair either -- treat it as a straight replace
2025-07-01 05:51:06.488 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:51:06.493 return
2025-07-01 05:51:06.498 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:51:06.507 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:51:06.515 else:
2025-07-01 05:51:06.523 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:51:06.533 eqi = None
2025-07-01 05:51:06.544
2025-07-01 05:51:06.554 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:51:06.562 # identical
2025-07-01 05:51:06.569
2025-07-01 05:51:06.575 # pump out diffs from before the synch point
2025-07-01 05:51:06.583 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:51:06.594
2025-07-01 05:51:06.601 # do intraline marking on the synch pair
2025-07-01 05:51:06.609 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:51:06.615 if eqi is None:
2025-07-01 05:51:06.623 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:51:06.630 atags = btags = ""
2025-07-01 05:51:06.641 cruncher.set_seqs(aelt, belt)
2025-07-01 05:51:06.652 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:51:06.661 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:51:06.668 if tag == 'replace':
2025-07-01 05:51:06.674 atags += '^' * la
2025-07-01 05:51:06.683 btags += '^' * lb
2025-07-01 05:51:06.693 elif tag == 'delete':
2025-07-01 05:51:06.700 atags += '-' * la
2025-07-01 05:51:06.706 elif tag == 'insert':
2025-07-01 05:51:06.716 btags += '+' * lb
2025-07-01 05:51:06.725 elif tag == 'equal':
2025-07-01 05:51:06.731 atags += ' ' * la
2025-07-01 05:51:06.736 btags += ' ' * lb
2025-07-01 05:51:06.741 else:
2025-07-01 05:51:06.746 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:51:06.751 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:51:06.756 else:
2025-07-01 05:51:06.761 # the synch pair is identical
2025-07-01 05:51:06.767 yield ' ' + aelt
2025-07-01 05:51:06.775
2025-07-01 05:51:06.785 # pump out diffs from after the synch point
2025-07-01 05:51:06.792 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:51:06.798
2025-07-01 05:51:06.804 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:51:06.809 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:51:06.814
2025-07-01 05:51:06.820 self = <difflib.Differ object at [hex]>
2025-07-01 05:51:06.827 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:51:06.833 alo = 483, ahi = 1101
2025-07-01 05:51:06.843 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:51:06.852 blo = 483, bhi = 1101
2025-07-01 05:51:06.859
2025-07-01 05:51:06.864 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:51:06.870 g = []
2025-07-01 05:51:06.879 if alo < ahi:
2025-07-01 05:51:06.887 if blo < bhi:
2025-07-01 05:51:06.893 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:51:06.899 else:
2025-07-01 05:51:06.906 g = self._dump('-', a, alo, ahi)
2025-07-01 05:51:06.914 elif blo < bhi:
2025-07-01 05:51:06.924 g = self._dump('+', b, blo, bhi)
2025-07-01 05:51:06.931
2025-07-01 05:51:06.939 > yield from g
2025-07-01 05:51:06.944
2025-07-01 05:51:06.951 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:51:06.957 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:51:06.962
2025-07-01 05:51:06.968 self = <difflib.Differ object at [hex]>
2025-07-01 05:51:06.981 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:51:06.991 alo = 483, ahi = 1101
2025-07-01 05:51:06.999 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:51:07.007 blo = 483, bhi = 1101
2025-07-01 05:51:07.013
2025-07-01 05:51:07.019 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:51:07.024 r"""
2025-07-01 05:51:07.030 When replacing one block of lines with another, search the blocks
2025-07-01 05:51:07.036 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:51:07.049 synch point, and intraline difference marking is done on the
2025-07-01 05:51:07.060 similar pair. Lots of work, but often worth it.
2025-07-01 05:51:07.069
2025-07-01 05:51:07.081 Example:
2025-07-01 05:51:07.090
2025-07-01 05:51:07.097 >>> d = Differ()
2025-07-01 05:51:07.103 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:51:07.108 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:51:07.113 >>> print(''.join(results), end="")
2025-07-01 05:51:07.119 - abcDefghiJkl
2025-07-01 05:51:07.135 + abcdefGhijkl
2025-07-01 05:51:07.147 """
2025-07-01 05:51:07.154
2025-07-01 05:51:07.162 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:51:07.173 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:51:07.183 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:51:07.191 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:51:07.198 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:51:07.204
2025-07-01 05:51:07.211 # search for the pair that matches best without being identical
2025-07-01 05:51:07.224 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:51:07.236 # on junk -- unless we have to)
2025-07-01 05:51:07.247 for j in range(blo, bhi):
2025-07-01 05:51:07.254 bj = b[j]
2025-07-01 05:51:07.261 cruncher.set_seq2(bj)
2025-07-01 05:51:07.267 for i in range(alo, ahi):
2025-07-01 05:51:07.273 ai = a[i]
2025-07-01 05:51:07.279 if ai == bj:
2025-07-01 05:51:07.285 if eqi is None:
2025-07-01 05:51:07.291 eqi, eqj = i, j
2025-07-01 05:51:07.296 continue
2025-07-01 05:51:07.302 cruncher.set_seq1(ai)
2025-07-01 05:51:07.313 # computing similarity is expensive, so use the quick
2025-07-01 05:51:07.322 # upper bounds first -- have seen this speed up messy
2025-07-01 05:51:07.329 # compares by a factor of 3.
2025-07-01 05:51:07.336 # note that ratio() is only expensive to compute the first
2025-07-01 05:51:07.342 # time it's called on a sequence pair; the expensive part
2025-07-01 05:51:07.352 # of the computation is cached by cruncher
2025-07-01 05:51:07.360 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:51:07.367 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:51:07.375 cruncher.ratio() > best_ratio:
2025-07-01 05:51:07.384 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:51:07.396 if best_ratio < cutoff:
2025-07-01 05:51:07.405 # no non-identical "pretty close" pair
2025-07-01 05:51:07.412 if eqi is None:
2025-07-01 05:51:07.419 # no identical pair either -- treat it as a straight replace
2025-07-01 05:51:07.424 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:51:07.430 return
2025-07-01 05:51:07.436 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:51:07.444 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:51:07.450 else:
2025-07-01 05:51:07.456 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:51:07.463 eqi = None
2025-07-01 05:51:07.473
2025-07-01 05:51:07.482 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:51:07.489 # identical
2025-07-01 05:51:07.494
2025-07-01 05:51:07.499 # pump out diffs from before the synch point
2025-07-01 05:51:07.504 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:51:07.509
2025-07-01 05:51:07.514 # do intraline marking on the synch pair
2025-07-01 05:51:07.519 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:51:07.524 if eqi is None:
2025-07-01 05:51:07.529 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:51:07.534 atags = btags = ""
2025-07-01 05:51:07.539 cruncher.set_seqs(aelt, belt)
2025-07-01 05:51:07.543 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:51:07.548 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:51:07.552 if tag == 'replace':
2025-07-01 05:51:07.557 atags += '^' * la
2025-07-01 05:51:07.561 btags += '^' * lb
2025-07-01 05:51:07.566 elif tag == 'delete':
2025-07-01 05:51:07.570 atags += '-' * la
2025-07-01 05:51:07.575 elif tag == 'insert':
2025-07-01 05:51:07.579 btags += '+' * lb
2025-07-01 05:51:07.585 elif tag == 'equal':
2025-07-01 05:51:07.591 atags += ' ' * la
2025-07-01 05:51:07.597 btags += ' ' * lb
2025-07-01 05:51:07.603 else:
2025-07-01 05:51:07.609 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:51:07.615 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:51:07.628 else:
2025-07-01 05:51:07.637 # the synch pair is identical
2025-07-01 05:51:07.650 yield ' ' + aelt
2025-07-01 05:51:07.659
2025-07-01 05:51:07.667 # pump out diffs from after the synch point
2025-07-01 05:51:07.675 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:51:07.682
2025-07-01 05:51:07.692 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:51:07.700 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:51:07.707
2025-07-01 05:51:07.715 self = <difflib.Differ object at [hex]>
2025-07-01 05:51:07.725 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:51:07.733 alo = 484, ahi = 1101
2025-07-01 05:51:07.742 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:51:07.750 blo = 484, bhi = 1101
2025-07-01 05:51:07.759
2025-07-01 05:51:07.770 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:51:07.779 g = []
2025-07-01 05:51:07.790 if alo < ahi:
2025-07-01 05:51:07.798 if blo < bhi:
2025-07-01 05:51:07.807 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:51:07.817 else:
2025-07-01 05:51:07.829 g = self._dump('-', a, alo, ahi)
2025-07-01 05:51:07.841 elif blo < bhi:
2025-07-01 05:51:07.851 g = self._dump('+', b, blo, bhi)
2025-07-01 05:51:07.857
2025-07-01 05:51:07.863 > yield from g
2025-07-01 05:51:07.869
2025-07-01 05:51:07.874 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:51:07.879 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:51:07.883
2025-07-01 05:51:07.888 self = <difflib.Differ object at [hex]>
2025-07-01 05:51:07.895 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:51:07.900 alo = 484, ahi = 1101
2025-07-01 05:51:07.906 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:51:07.912 blo = 484, bhi = 1101
2025-07-01 05:51:07.918
2025-07-01 05:51:07.924 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:51:07.931 r"""
2025-07-01 05:51:07.944 When replacing one block of lines with another, search the blocks
2025-07-01 05:51:07.952 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:51:07.959 synch point, and intraline difference marking is done on the
2025-07-01 05:51:07.966 similar pair. Lots of work, but often worth it.
2025-07-01 05:51:07.971
2025-07-01 05:51:07.977 Example:
2025-07-01 05:51:07.982
2025-07-01 05:51:07.992 >>> d = Differ()
2025-07-01 05:51:08.001 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:51:08.008 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:51:08.014 >>> print(''.join(results), end="")
2025-07-01 05:51:08.022 - abcDefghiJkl
2025-07-01 05:51:08.034 + abcdefGhijkl
2025-07-01 05:51:08.044 """
2025-07-01 05:51:08.053
2025-07-01 05:51:08.065 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:51:08.077 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:51:08.087 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:51:08.096 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:51:08.103 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:51:08.111
2025-07-01 05:51:08.121 # search for the pair that matches best without being identical
2025-07-01 05:51:08.134 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:51:08.144 # on junk -- unless we have to)
2025-07-01 05:51:08.152 for j in range(blo, bhi):
2025-07-01 05:51:08.160 bj = b[j]
2025-07-01 05:51:08.166 cruncher.set_seq2(bj)
2025-07-01 05:51:08.178 for i in range(alo, ahi):
2025-07-01 05:51:08.187 ai = a[i]
2025-07-01 05:51:08.199 if ai == bj:
2025-07-01 05:51:08.209 if eqi is None:
2025-07-01 05:51:08.221 eqi, eqj = i, j
2025-07-01 05:51:08.232 continue
2025-07-01 05:51:08.239 cruncher.set_seq1(ai)
2025-07-01 05:51:08.247 # computing similarity is expensive, so use the quick
2025-07-01 05:51:08.259 # upper bounds first -- have seen this speed up messy
2025-07-01 05:51:08.267 # compares by a factor of 3.
2025-07-01 05:51:08.275 # note that ratio() is only expensive to compute the first
2025-07-01 05:51:08.281 # time it's called on a sequence pair; the expensive part
2025-07-01 05:51:08.293 # of the computation is cached by cruncher
2025-07-01 05:51:08.304 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:51:08.313 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:51:08.322 cruncher.ratio() > best_ratio:
2025-07-01 05:51:08.331 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:51:08.342 if best_ratio < cutoff:
2025-07-01 05:51:08.351 # no non-identical "pretty close" pair
2025-07-01 05:51:08.359 if eqi is None:
2025-07-01 05:51:08.372 # no identical pair either -- treat it as a straight replace
2025-07-01 05:51:08.383 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:51:08.396 return
2025-07-01 05:51:08.404 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:51:08.410 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:51:08.417 else:
2025-07-01 05:51:08.424 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:51:08.430 eqi = None
2025-07-01 05:51:08.437
2025-07-01 05:51:08.443 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:51:08.448 # identical
2025-07-01 05:51:08.454
2025-07-01 05:51:08.461 # pump out diffs from before the synch point
2025-07-01 05:51:08.466 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:51:08.471
2025-07-01 05:51:08.476 # do intraline marking on the synch pair
2025-07-01 05:51:08.481 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:51:08.489 if eqi is None:
2025-07-01 05:51:08.494 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:51:08.502 atags = btags = ""
2025-07-01 05:51:08.510 cruncher.set_seqs(aelt, belt)
2025-07-01 05:51:08.519 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:51:08.527 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:51:08.535 if tag == 'replace':
2025-07-01 05:51:08.546 atags += '^' * la
2025-07-01 05:51:08.555 btags += '^' * lb
2025-07-01 05:51:08.563 elif tag == 'delete':
2025-07-01 05:51:08.574 atags += '-' * la
2025-07-01 05:51:08.583 elif tag == 'insert':
2025-07-01 05:51:08.590 btags += '+' * lb
2025-07-01 05:51:08.595 elif tag == 'equal':
2025-07-01 05:51:08.600 atags += ' ' * la
2025-07-01 05:51:08.610 btags += ' ' * lb
2025-07-01 05:51:08.618 else:
2025-07-01 05:51:08.625 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:51:08.634 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:51:08.640 else:
2025-07-01 05:51:08.647 # the synch pair is identical
2025-07-01 05:51:08.653 yield ' ' + aelt
2025-07-01 05:51:08.658
2025-07-01 05:51:08.670 # pump out diffs from after the synch point
2025-07-01 05:51:08.682 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:51:08.691
2025-07-01 05:51:08.698 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:51:08.704 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:51:08.711
2025-07-01 05:51:08.725 self = <difflib.Differ object at [hex]>
2025-07-01 05:51:08.736 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:51:08.745 alo = 485, ahi = 1101
2025-07-01 05:51:08.752 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:51:08.758 blo = 485, bhi = 1101
2025-07-01 05:51:08.764
2025-07-01 05:51:08.770 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:51:08.775 g = []
2025-07-01 05:51:08.781 if alo < ahi:
2025-07-01 05:51:08.786 if blo < bhi:
2025-07-01 05:51:08.797 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:51:08.807 else:
2025-07-01 05:51:08.815 g = self._dump('-', a, alo, ahi)
2025-07-01 05:51:08.821 elif blo < bhi:
2025-07-01 05:51:08.827 g = self._dump('+', b, blo, bhi)
2025-07-01 05:51:08.833
2025-07-01 05:51:08.839 > yield from g
2025-07-01 05:51:08.851
2025-07-01 05:51:08.865 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:51:08.876 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:51:08.883
2025-07-01 05:51:08.889 self = <difflib.Differ object at [hex]>
2025-07-01 05:51:08.896 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:51:08.903 alo = 485, ahi = 1101
2025-07-01 05:51:08.911 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:51:08.919 blo = 485, bhi = 1101
2025-07-01 05:51:08.929
2025-07-01 05:51:08.938 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:51:08.944 r"""
2025-07-01 05:51:08.950 When replacing one block of lines with another, search the blocks
2025-07-01 05:51:08.960 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:51:08.970 synch point, and intraline difference marking is done on the
2025-07-01 05:51:08.980 similar pair. Lots of work, but often worth it.
2025-07-01 05:51:08.991
2025-07-01 05:51:09.000 Example:
2025-07-01 05:51:09.008
2025-07-01 05:51:09.016 >>> d = Differ()
2025-07-01 05:51:09.024 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:51:09.032 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:51:09.039 >>> print(''.join(results), end="")
2025-07-01 05:51:09.045 - abcDefghiJkl
2025-07-01 05:51:09.057 + abcdefGhijkl
2025-07-01 05:51:09.071 """
2025-07-01 05:51:09.082
2025-07-01 05:51:09.092 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:51:09.102 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:51:09.112 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:51:09.121 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:51:09.133 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:51:09.140
2025-07-01 05:51:09.148 # search for the pair that matches best without being identical
2025-07-01 05:51:09.157 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:51:09.164 # on junk -- unless we have to)
2025-07-01 05:51:09.171 for j in range(blo, bhi):
2025-07-01 05:51:09.178 bj = b[j]
2025-07-01 05:51:09.187 cruncher.set_seq2(bj)
2025-07-01 05:51:09.200 for i in range(alo, ahi):
2025-07-01 05:51:09.210 ai = a[i]
2025-07-01 05:51:09.219 if ai == bj:
2025-07-01 05:51:09.227 if eqi is None:
2025-07-01 05:51:09.235 eqi, eqj = i, j
2025-07-01 05:51:09.242 continue
2025-07-01 05:51:09.250 cruncher.set_seq1(ai)
2025-07-01 05:51:09.258 # computing similarity is expensive, so use the quick
2025-07-01 05:51:09.265 # upper bounds first -- have seen this speed up messy
2025-07-01 05:51:09.272 # compares by a factor of 3.
2025-07-01 05:51:09.279 # note that ratio() is only expensive to compute the first
2025-07-01 05:51:09.286 # time it's called on a sequence pair; the expensive part
2025-07-01 05:51:09.295 # of the computation is cached by cruncher
2025-07-01 05:51:09.308 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:51:09.324 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:51:09.335 cruncher.ratio() > best_ratio:
2025-07-01 05:51:09.344 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:51:09.351 if best_ratio < cutoff:
2025-07-01 05:51:09.360 # no non-identical "pretty close" pair
2025-07-01 05:51:09.373 if eqi is None:
2025-07-01 05:51:09.382 # no identical pair either -- treat it as a straight replace
2025-07-01 05:51:09.390 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:51:09.398 return
2025-07-01 05:51:09.404 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:51:09.410 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:51:09.420 else:
2025-07-01 05:51:09.428 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:51:09.435 eqi = None
2025-07-01 05:51:09.443
2025-07-01 05:51:09.452 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:51:09.459 # identical
2025-07-01 05:51:09.465
2025-07-01 05:51:09.472 # pump out diffs from before the synch point
2025-07-01 05:51:09.478 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:51:09.484
2025-07-01 05:51:09.491 # do intraline marking on the synch pair
2025-07-01 05:51:09.497 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:51:09.503 if eqi is None:
2025-07-01 05:51:09.509 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:51:09.515 atags = btags = ""
2025-07-01 05:51:09.521 cruncher.set_seqs(aelt, belt)
2025-07-01 05:51:09.527 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:51:09.534 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:51:09.544 if tag == 'replace':
2025-07-01 05:51:09.554 atags += '^' * la
2025-07-01 05:51:09.565 btags += '^' * lb
2025-07-01 05:51:09.573 elif tag == 'delete':
2025-07-01 05:51:09.580 atags += '-' * la
2025-07-01 05:51:09.587 elif tag == 'insert':
2025-07-01 05:51:09.598 btags += '+' * lb
2025-07-01 05:51:09.609 elif tag == 'equal':
2025-07-01 05:51:09.620 atags += ' ' * la
2025-07-01 05:51:09.629 btags += ' ' * lb
2025-07-01 05:51:09.641 else:
2025-07-01 05:51:09.652 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:51:09.661 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:51:09.668 else:
2025-07-01 05:51:09.675 # the synch pair is identical
2025-07-01 05:51:09.680 yield ' ' + aelt
2025-07-01 05:51:09.687
2025-07-01 05:51:09.697 # pump out diffs from after the synch point
2025-07-01 05:51:09.706 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:51:09.715
2025-07-01 05:51:09.722 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:51:09.731 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:51:09.741
2025-07-01 05:51:09.749 self = <difflib.Differ object at [hex]>
2025-07-01 05:51:09.758 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:51:09.770 alo = 488, ahi = 1101
2025-07-01 05:51:09.781 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:51:09.789 blo = 488, bhi = 1101
2025-07-01 05:51:09.796
2025-07-01 05:51:09.803 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:51:09.809 g = []
2025-07-01 05:51:09.815 if alo < ahi:
2025-07-01 05:51:09.822 if blo < bhi:
2025-07-01 05:51:09.832 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:51:09.840 else:
2025-07-01 05:51:09.847 g = self._dump('-', a, alo, ahi)
2025-07-01 05:51:09.855 elif blo < bhi:
2025-07-01 05:51:09.866 g = self._dump('+', b, blo, bhi)
2025-07-01 05:51:09.876
2025-07-01 05:51:09.883 > yield from g
2025-07-01 05:51:09.893
2025-07-01 05:51:09.905 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:51:09.916 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:51:09.924
2025-07-01 05:51:09.932 self = <difflib.Differ object at [hex]>
2025-07-01 05:51:09.939 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:51:09.945 alo = 488, ahi = 1101
2025-07-01 05:51:09.956 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:51:09.966 blo = 488, bhi = 1101
2025-07-01 05:51:09.973
2025-07-01 05:51:09.980 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:51:09.986 r"""
2025-07-01 05:51:09.992 When replacing one block of lines with another, search the blocks
2025-07-01 05:51:09.998 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:51:10.011 synch point, and intraline difference marking is done on the
2025-07-01 05:51:10.021 similar pair. Lots of work, but often worth it.
2025-07-01 05:51:10.029
2025-07-01 05:51:10.035 Example:
2025-07-01 05:51:10.042
2025-07-01 05:51:10.049 >>> d = Differ()
2025-07-01 05:51:10.055 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:51:10.061 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:51:10.067 >>> print(''.join(results), end="")
2025-07-01 05:51:10.073 - abcDefghiJkl
2025-07-01 05:51:10.085 + abcdefGhijkl
2025-07-01 05:51:10.096 """
2025-07-01 05:51:10.102
2025-07-01 05:51:10.107 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:51:10.112 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:51:10.122 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:51:10.131 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:51:10.142 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:51:10.151
2025-07-01 05:51:10.164 # search for the pair that matches best without being identical
2025-07-01 05:51:10.173 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:51:10.184 # on junk -- unless we have to)
2025-07-01 05:51:10.194 for j in range(blo, bhi):
2025-07-01 05:51:10.203 bj = b[j]
2025-07-01 05:51:10.215 cruncher.set_seq2(bj)
2025-07-01 05:51:10.224 for i in range(alo, ahi):
2025-07-01 05:51:10.232 ai = a[i]
2025-07-01 05:51:10.239 if ai == bj:
2025-07-01 05:51:10.247 if eqi is None:
2025-07-01 05:51:10.258 eqi, eqj = i, j
2025-07-01 05:51:10.268 continue
2025-07-01 05:51:10.282 cruncher.set_seq1(ai)
2025-07-01 05:51:10.292 # computing similarity is expensive, so use the quick
2025-07-01 05:51:10.299 # upper bounds first -- have seen this speed up messy
2025-07-01 05:51:10.306 # compares by a factor of 3.
2025-07-01 05:51:10.312 # note that ratio() is only expensive to compute the first
2025-07-01 05:51:10.318 # time it's called on a sequence pair; the expensive part
2025-07-01 05:51:10.329 # of the computation is cached by cruncher
2025-07-01 05:51:10.337 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:51:10.345 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:51:10.355 cruncher.ratio() > best_ratio:
2025-07-01 05:51:10.362 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:51:10.369 if best_ratio < cutoff:
2025-07-01 05:51:10.375 # no non-identical "pretty close" pair
2025-07-01 05:51:10.383 if eqi is None:
2025-07-01 05:51:10.393 # no identical pair either -- treat it as a straight replace
2025-07-01 05:51:10.404 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:51:10.415 return
2025-07-01 05:51:10.424 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:51:10.434 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:51:10.445 else:
2025-07-01 05:51:10.454 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:51:10.461 eqi = None
2025-07-01 05:51:10.467
2025-07-01 05:51:10.473 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:51:10.484 # identical
2025-07-01 05:51:10.493
2025-07-01 05:51:10.504 # pump out diffs from before the synch point
2025-07-01 05:51:10.513 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:51:10.525
2025-07-01 05:51:10.534 # do intraline marking on the synch pair
2025-07-01 05:51:10.547 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:51:10.557 if eqi is None:
2025-07-01 05:51:10.565 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:51:10.575 atags = btags = ""
2025-07-01 05:51:10.587 cruncher.set_seqs(aelt, belt)
2025-07-01 05:51:10.597 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:51:10.605 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:51:10.611 if tag == 'replace':
2025-07-01 05:51:10.618 atags += '^' * la
2025-07-01 05:51:10.625 btags += '^' * lb
2025-07-01 05:51:10.631 elif tag == 'delete':
2025-07-01 05:51:10.638 atags += '-' * la
2025-07-01 05:51:10.645 elif tag == 'insert':
2025-07-01 05:51:10.651 btags += '+' * lb
2025-07-01 05:51:10.660 elif tag == 'equal':
2025-07-01 05:51:10.672 atags += ' ' * la
2025-07-01 05:51:10.681 btags += ' ' * lb
2025-07-01 05:51:10.689 else:
2025-07-01 05:51:10.695 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:51:10.700 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:51:10.705 else:
2025-07-01 05:51:10.709 # the synch pair is identical
2025-07-01 05:51:10.714 yield ' ' + aelt
2025-07-01 05:51:10.729
2025-07-01 05:51:10.738 # pump out diffs from after the synch point
2025-07-01 05:51:10.746 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:51:10.754
2025-07-01 05:51:10.760 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:51:10.766 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:51:10.771
2025-07-01 05:51:10.777 self = <difflib.Differ object at [hex]>
2025-07-01 05:51:10.783 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:51:10.789 alo = 489, ahi = 1101
2025-07-01 05:51:10.795 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:51:10.800 blo = 489, bhi = 1101
2025-07-01 05:51:10.806
2025-07-01 05:51:10.812 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:51:10.818 g = []
2025-07-01 05:51:10.824 if alo < ahi:
2025-07-01 05:51:10.830 if blo < bhi:
2025-07-01 05:51:10.835 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:51:10.839 else:
2025-07-01 05:51:10.844 g = self._dump('-', a, alo, ahi)
2025-07-01 05:51:10.848 elif blo < bhi:
2025-07-01 05:51:10.859 g = self._dump('+', b, blo, bhi)
2025-07-01 05:51:10.867
2025-07-01 05:51:10.875 > yield from g
2025-07-01 05:51:10.883
2025-07-01 05:51:10.890 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:51:10.901 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:51:10.912
2025-07-01 05:51:10.921 self = <difflib.Differ object at [hex]>
2025-07-01 05:51:10.932 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:51:10.942 alo = 489, ahi = 1101
2025-07-01 05:51:10.950 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:51:10.956 blo = 489, bhi = 1101
2025-07-01 05:51:10.963
2025-07-01 05:51:10.968 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:51:10.974 r"""
2025-07-01 05:51:10.985 When replacing one block of lines with another, search the blocks
2025-07-01 05:51:10.991 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:51:10.998 synch point, and intraline difference marking is done on the
2025-07-01 05:51:11.004 similar pair. Lots of work, but often worth it.
2025-07-01 05:51:11.009
2025-07-01 05:51:11.014 Example:
2025-07-01 05:51:11.019
2025-07-01 05:51:11.024 >>> d = Differ()
2025-07-01 05:51:11.030 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:51:11.040 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:51:11.049 >>> print(''.join(results), end="")
2025-07-01 05:51:11.056 - abcDefghiJkl
2025-07-01 05:51:11.070 + abcdefGhijkl
2025-07-01 05:51:11.089 """
2025-07-01 05:51:11.096
2025-07-01 05:51:11.101 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:51:11.112 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:51:11.121 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:51:11.128 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:51:11.135 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:51:11.140
2025-07-01 05:51:11.146 # search for the pair that matches best without being identical
2025-07-01 05:51:11.153 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:51:11.159 # on junk -- unless we have to)
2025-07-01 05:51:11.168 for j in range(blo, bhi):
2025-07-01 05:51:11.179 bj = b[j]
2025-07-01 05:51:11.186 cruncher.set_seq2(bj)
2025-07-01 05:51:11.191 for i in range(alo, ahi):
2025-07-01 05:51:11.196 ai = a[i]
2025-07-01 05:51:11.200 if ai == bj:
2025-07-01 05:51:11.205 if eqi is None:
2025-07-01 05:51:11.211 eqi, eqj = i, j
2025-07-01 05:51:11.215 continue
2025-07-01 05:51:11.220 cruncher.set_seq1(ai)
2025-07-01 05:51:11.224 # computing similarity is expensive, so use the quick
2025-07-01 05:51:11.229 # upper bounds first -- have seen this speed up messy
2025-07-01 05:51:11.233 # compares by a factor of 3.
2025-07-01 05:51:11.238 # note that ratio() is only expensive to compute the first
2025-07-01 05:51:11.242 # time it's called on a sequence pair; the expensive part
2025-07-01 05:51:11.247 # of the computation is cached by cruncher
2025-07-01 05:51:11.251 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:51:11.256 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:51:11.260 cruncher.ratio() > best_ratio:
2025-07-01 05:51:11.265 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:51:11.269 if best_ratio < cutoff:
2025-07-01 05:51:11.276 # no non-identical "pretty close" pair
2025-07-01 05:51:11.282 if eqi is None:
2025-07-01 05:51:11.292 # no identical pair either -- treat it as a straight replace
2025-07-01 05:51:11.304 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:51:11.316 return
2025-07-01 05:51:11.325 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:51:11.337 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:51:11.348 else:
2025-07-01 05:51:11.359 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:51:11.369 eqi = None
2025-07-01 05:51:11.377
2025-07-01 05:51:11.384 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:51:11.390 # identical
2025-07-01 05:51:11.401
2025-07-01 05:51:11.411 # pump out diffs from before the synch point
2025-07-01 05:51:11.420 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:51:11.428
2025-07-01 05:51:11.435 # do intraline marking on the synch pair
2025-07-01 05:51:11.443 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:51:11.454 if eqi is None:
2025-07-01 05:51:11.462 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:51:11.470 atags = btags = ""
2025-07-01 05:51:11.479 cruncher.set_seqs(aelt, belt)
2025-07-01 05:51:11.487 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:51:11.494 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:51:11.500 if tag == 'replace':
2025-07-01 05:51:11.512 atags += '^' * la
2025-07-01 05:51:11.521 btags += '^' * lb
2025-07-01 05:51:11.528 elif tag == 'delete':
2025-07-01 05:51:11.533 atags += '-' * la
2025-07-01 05:51:11.537 elif tag == 'insert':
2025-07-01 05:51:11.547 btags += '+' * lb
2025-07-01 05:51:11.555 elif tag == 'equal':
2025-07-01 05:51:11.562 atags += ' ' * la
2025-07-01 05:51:11.568 btags += ' ' * lb
2025-07-01 05:51:11.578 else:
2025-07-01 05:51:11.591 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:51:11.603 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:51:11.612 else:
2025-07-01 05:51:11.619 # the synch pair is identical
2025-07-01 05:51:11.626 yield ' ' + aelt
2025-07-01 05:51:11.633
2025-07-01 05:51:11.639 # pump out diffs from after the synch point
2025-07-01 05:51:11.647 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:51:11.654
2025-07-01 05:51:11.660 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:51:11.666 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:51:11.672
2025-07-01 05:51:11.676 self = <difflib.Differ object at [hex]>
2025-07-01 05:51:11.682 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:51:11.687 alo = 490, ahi = 1101
2025-07-01 05:51:11.693 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:51:11.698 blo = 490, bhi = 1101
2025-07-01 05:51:11.705
2025-07-01 05:51:11.713 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:51:11.720 g = []
2025-07-01 05:51:11.727 if alo < ahi:
2025-07-01 05:51:11.738 if blo < bhi:
2025-07-01 05:51:11.746 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:51:11.759 else:
2025-07-01 05:51:11.771 g = self._dump('-', a, alo, ahi)
2025-07-01 05:51:11.782 elif blo < bhi:
2025-07-01 05:51:11.793 g = self._dump('+', b, blo, bhi)
2025-07-01 05:51:11.803
2025-07-01 05:51:11.814 > yield from g
2025-07-01 05:51:11.823
2025-07-01 05:51:11.834 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:51:11.844 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:51:11.853
2025-07-01 05:51:11.866 self = <difflib.Differ object at [hex]>
2025-07-01 05:51:11.877 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:51:11.883 alo = 490, ahi = 1101
2025-07-01 05:51:11.891 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:51:11.898 blo = 490, bhi = 1101
2025-07-01 05:51:11.904
2025-07-01 05:51:11.910 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:51:11.915 r"""
2025-07-01 05:51:11.922 When replacing one block of lines with another, search the blocks
2025-07-01 05:51:11.931 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:51:11.939 synch point, and intraline difference marking is done on the
2025-07-01 05:51:11.947 similar pair. Lots of work, but often worth it.
2025-07-01 05:51:11.956
2025-07-01 05:51:11.967 Example:
2025-07-01 05:51:11.975
2025-07-01 05:51:11.984 >>> d = Differ()
2025-07-01 05:51:11.994 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:51:12.003 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:51:12.013 >>> print(''.join(results), end="")
2025-07-01 05:51:12.020 - abcDefghiJkl
2025-07-01 05:51:12.037 + abcdefGhijkl
2025-07-01 05:51:12.052 """
2025-07-01 05:51:12.057
2025-07-01 05:51:12.062 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:51:12.071 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:51:12.080 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:51:12.088 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:51:12.098 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:51:12.109
2025-07-01 05:51:12.121 # search for the pair that matches best without being identical
2025-07-01 05:51:12.132 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:51:12.144 # on junk -- unless we have to)
2025-07-01 05:51:12.161 for j in range(blo, bhi):
2025-07-01 05:51:12.171 bj = b[j]
2025-07-01 05:51:12.179 cruncher.set_seq2(bj)
2025-07-01 05:51:12.186 for i in range(alo, ahi):
2025-07-01 05:51:12.194 ai = a[i]
2025-07-01 05:51:12.207 if ai == bj:
2025-07-01 05:51:12.218 if eqi is None:
2025-07-01 05:51:12.227 eqi, eqj = i, j
2025-07-01 05:51:12.235 continue
2025-07-01 05:51:12.243 cruncher.set_seq1(ai)
2025-07-01 05:51:12.253 # computing similarity is expensive, so use the quick
2025-07-01 05:51:12.261 # upper bounds first -- have seen this speed up messy
2025-07-01 05:51:12.269 # compares by a factor of 3.
2025-07-01 05:51:12.275 # note that ratio() is only expensive to compute the first
2025-07-01 05:51:12.282 # time it's called on a sequence pair; the expensive part
2025-07-01 05:51:12.288 # of the computation is cached by cruncher
2025-07-01 05:51:12.294 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:51:12.299 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:51:12.306 cruncher.ratio() > best_ratio:
2025-07-01 05:51:12.312 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:51:12.318 if best_ratio < cutoff:
2025-07-01 05:51:12.324 # no non-identical "pretty close" pair
2025-07-01 05:51:12.331 if eqi is None:
2025-07-01 05:51:12.341 # no identical pair either -- treat it as a straight replace
2025-07-01 05:51:12.351 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:51:12.362 return
2025-07-01 05:51:12.373 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:51:12.384 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:51:12.392 else:
2025-07-01 05:51:12.400 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:51:12.406 eqi = None
2025-07-01 05:51:12.411
2025-07-01 05:51:12.419 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:51:12.430 # identical
2025-07-01 05:51:12.438
2025-07-01 05:51:12.444 # pump out diffs from before the synch point
2025-07-01 05:51:12.450 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:51:12.456
2025-07-01 05:51:12.463 # do intraline marking on the synch pair
2025-07-01 05:51:12.470 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:51:12.476 if eqi is None:
2025-07-01 05:51:12.484 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:51:12.494 atags = btags = ""
2025-07-01 05:51:12.505 cruncher.set_seqs(aelt, belt)
2025-07-01 05:51:12.513 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:51:12.519 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:51:12.529 if tag == 'replace':
2025-07-01 05:51:12.536 atags += '^' * la
2025-07-01 05:51:12.542 btags += '^' * lb
2025-07-01 05:51:12.547 elif tag == 'delete':
2025-07-01 05:51:12.553 atags += '-' * la
2025-07-01 05:51:12.560 elif tag == 'insert':
2025-07-01 05:51:12.567 btags += '+' * lb
2025-07-01 05:51:12.575 elif tag == 'equal':
2025-07-01 05:51:12.585 atags += ' ' * la
2025-07-01 05:51:12.595 btags += ' ' * lb
2025-07-01 05:51:12.606 else:
2025-07-01 05:51:12.615 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:51:12.624 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:51:12.630 else:
2025-07-01 05:51:12.636 # the synch pair is identical
2025-07-01 05:51:12.647 yield ' ' + aelt
2025-07-01 05:51:12.657
2025-07-01 05:51:12.665 # pump out diffs from after the synch point
2025-07-01 05:51:12.670 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:51:12.675
2025-07-01 05:51:12.681 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:51:12.686 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:51:12.691
2025-07-01 05:51:12.703 self = <difflib.Differ object at [hex]>
2025-07-01 05:51:12.715 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:51:12.722 alo = 491, ahi = 1101
2025-07-01 05:51:12.730 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:51:12.739 blo = 491, bhi = 1101
2025-07-01 05:51:12.749
2025-07-01 05:51:12.757 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:51:12.766 g = []
2025-07-01 05:51:12.775 if alo < ahi:
2025-07-01 05:51:12.783 if blo < bhi:
2025-07-01 05:51:12.792 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:51:12.803 else:
2025-07-01 05:51:12.812 g = self._dump('-', a, alo, ahi)
2025-07-01 05:51:12.819 elif blo < bhi:
2025-07-01 05:51:12.825 g = self._dump('+', b, blo, bhi)
2025-07-01 05:51:12.832
2025-07-01 05:51:12.838 > yield from g
2025-07-01 05:51:12.844
2025-07-01 05:51:12.850 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:51:12.856 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:51:12.862
2025-07-01 05:51:12.868 self = <difflib.Differ object at [hex]>
2025-07-01 05:51:12.874 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:51:12.880 alo = 491, ahi = 1101
2025-07-01 05:51:12.887 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:51:12.893 blo = 491, bhi = 1101
2025-07-01 05:51:12.898
2025-07-01 05:51:12.904 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:51:12.910 r"""
2025-07-01 05:51:12.916 When replacing one block of lines with another, search the blocks
2025-07-01 05:51:12.923 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:51:12.929 synch point, and intraline difference marking is done on the
2025-07-01 05:51:12.935 similar pair. Lots of work, but often worth it.
2025-07-01 05:51:12.941
2025-07-01 05:51:12.947 Example:
2025-07-01 05:51:12.953
2025-07-01 05:51:12.959 >>> d = Differ()
2025-07-01 05:51:12.966 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:51:12.979 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:51:12.986 >>> print(''.join(results), end="")
2025-07-01 05:51:12.994 - abcDefghiJkl
2025-07-01 05:51:13.008 + abcdefGhijkl
2025-07-01 05:51:13.022 """
2025-07-01 05:51:13.034
2025-07-01 05:51:13.045 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:51:13.058 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:51:13.068 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:51:13.077 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:51:13.084 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:51:13.091
2025-07-01 05:51:13.098 # search for the pair that matches best without being identical
2025-07-01 05:51:13.106 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:51:13.118 # on junk -- unless we have to)
2025-07-01 05:51:13.127 for j in range(blo, bhi):
2025-07-01 05:51:13.135 bj = b[j]
2025-07-01 05:51:13.145 cruncher.set_seq2(bj)
2025-07-01 05:51:13.155 for i in range(alo, ahi):
2025-07-01 05:51:13.161 ai = a[i]
2025-07-01 05:51:13.167 if ai == bj:
2025-07-01 05:51:13.174 if eqi is None:
2025-07-01 05:51:13.180 eqi, eqj = i, j
2025-07-01 05:51:13.187 continue
2025-07-01 05:51:13.196 cruncher.set_seq1(ai)
2025-07-01 05:51:13.206 # computing similarity is expensive, so use the quick
2025-07-01 05:51:13.217 # upper bounds first -- have seen this speed up messy
2025-07-01 05:51:13.230 # compares by a factor of 3.
2025-07-01 05:51:13.238 # note that ratio() is only expensive to compute the first
2025-07-01 05:51:13.249 # time it's called on a sequence pair; the expensive part
2025-07-01 05:51:13.261 # of the computation is cached by cruncher
2025-07-01 05:51:13.270 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:51:13.279 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:51:13.291 cruncher.ratio() > best_ratio:
2025-07-01 05:51:13.301 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:51:13.311 if best_ratio < cutoff:
2025-07-01 05:51:13.322 # no non-identical "pretty close" pair
2025-07-01 05:51:13.332 if eqi is None:
2025-07-01 05:51:13.344 # no identical pair either -- treat it as a straight replace
2025-07-01 05:51:13.353 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:51:13.360 return
2025-07-01 05:51:13.366 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:51:13.372 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:51:13.383 else:
2025-07-01 05:51:13.391 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:51:13.400 eqi = None
2025-07-01 05:51:13.407
2025-07-01 05:51:13.414 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:51:13.425 # identical
2025-07-01 05:51:13.436
2025-07-01 05:51:13.447 # pump out diffs from before the synch point
2025-07-01 05:51:13.458 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:51:13.466
2025-07-01 05:51:13.473 # do intraline marking on the synch pair
2025-07-01 05:51:13.479 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:51:13.484 if eqi is None:
2025-07-01 05:51:13.489 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:51:13.499 atags = btags = ""
2025-07-01 05:51:13.508 cruncher.set_seqs(aelt, belt)
2025-07-01 05:51:13.514 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:51:13.520 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:51:13.526 if tag == 'replace':
2025-07-01 05:51:13.536 atags += '^' * la
2025-07-01 05:51:13.545 btags += '^' * lb
2025-07-01 05:51:13.553 elif tag == 'delete':
2025-07-01 05:51:13.563 atags += '-' * la
2025-07-01 05:51:13.572 elif tag == 'insert':
2025-07-01 05:51:13.578 btags += '+' * lb
2025-07-01 05:51:13.584 elif tag == 'equal':
2025-07-01 05:51:13.590 atags += ' ' * la
2025-07-01 05:51:13.597 btags += ' ' * lb
2025-07-01 05:51:13.607 else:
2025-07-01 05:51:13.616 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:51:13.625 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:51:13.630 else:
2025-07-01 05:51:13.636 # the synch pair is identical
2025-07-01 05:51:13.642 yield ' ' + aelt
2025-07-01 05:51:13.648
2025-07-01 05:51:13.654 # pump out diffs from after the synch point
2025-07-01 05:51:13.659 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:51:13.665
2025-07-01 05:51:13.671 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:51:13.676 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:51:13.682
2025-07-01 05:51:13.688 self = <difflib.Differ object at [hex]>
2025-07-01 05:51:13.694 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:51:13.700 alo = 492, ahi = 1101
2025-07-01 05:51:13.706 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:51:13.712 blo = 492, bhi = 1101
2025-07-01 05:51:13.719
2025-07-01 05:51:13.727 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:51:13.732 g = []
2025-07-01 05:51:13.738 if alo < ahi:
2025-07-01 05:51:13.743 if blo < bhi:
2025-07-01 05:51:13.748 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:51:13.753 else:
2025-07-01 05:51:13.758 g = self._dump('-', a, alo, ahi)
2025-07-01 05:51:13.763 elif blo < bhi:
2025-07-01 05:51:13.767 g = self._dump('+', b, blo, bhi)
2025-07-01 05:51:13.772
2025-07-01 05:51:13.776 > yield from g
2025-07-01 05:51:13.780
2025-07-01 05:51:13.785 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:51:13.790 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:51:13.796
2025-07-01 05:51:13.801 self = <difflib.Differ object at [hex]>
2025-07-01 05:51:13.806 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:51:13.810 alo = 492, ahi = 1101
2025-07-01 05:51:13.816 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:51:13.821 blo = 492, bhi = 1101
2025-07-01 05:51:13.827
2025-07-01 05:51:13.832 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:51:13.838 r"""
2025-07-01 05:51:13.847 When replacing one block of lines with another, search the blocks
2025-07-01 05:51:13.857 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:51:13.864 synch point, and intraline difference marking is done on the
2025-07-01 05:51:13.870 similar pair. Lots of work, but often worth it.
2025-07-01 05:51:13.876
2025-07-01 05:51:13.882 Example:
2025-07-01 05:51:13.887
2025-07-01 05:51:13.893 >>> d = Differ()
2025-07-01 05:51:13.898 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:51:13.904 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:51:13.909 >>> print(''.join(results), end="")
2025-07-01 05:51:13.915 - abcDefghiJkl
2025-07-01 05:51:13.926 + abcdefGhijkl
2025-07-01 05:51:13.938 """
2025-07-01 05:51:13.949
2025-07-01 05:51:13.958 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:51:13.965 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:51:13.972 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:51:13.979 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:51:13.990 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:51:13.999
2025-07-01 05:51:14.007 # search for the pair that matches best without being identical
2025-07-01 05:51:14.015 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:51:14.021 # on junk -- unless we have to)
2025-07-01 05:51:14.028 for j in range(blo, bhi):
2025-07-01 05:51:14.035 bj = b[j]
2025-07-01 05:51:14.043 cruncher.set_seq2(bj)
2025-07-01 05:51:14.055 for i in range(alo, ahi):
2025-07-01 05:51:14.065 ai = a[i]
2025-07-01 05:51:14.073 if ai == bj:
2025-07-01 05:51:14.079 if eqi is None:
2025-07-01 05:51:14.085 eqi, eqj = i, j
2025-07-01 05:51:14.090 continue
2025-07-01 05:51:14.095 cruncher.set_seq1(ai)
2025-07-01 05:51:14.100 # computing similarity is expensive, so use the quick
2025-07-01 05:51:14.105 # upper bounds first -- have seen this speed up messy
2025-07-01 05:51:14.110 # compares by a factor of 3.
2025-07-01 05:51:14.115 # note that ratio() is only expensive to compute the first
2025-07-01 05:51:14.119 # time it's called on a sequence pair; the expensive part
2025-07-01 05:51:14.124 # of the computation is cached by cruncher
2025-07-01 05:51:14.129 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:51:14.134 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:51:14.139 cruncher.ratio() > best_ratio:
2025-07-01 05:51:14.145 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:51:14.150 if best_ratio < cutoff:
2025-07-01 05:51:14.156 # no non-identical "pretty close" pair
2025-07-01 05:51:14.163 if eqi is None:
2025-07-01 05:51:14.170 # no identical pair either -- treat it as a straight replace
2025-07-01 05:51:14.179 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:51:14.190 return
2025-07-01 05:51:14.198 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:51:14.205 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:51:14.212 else:
2025-07-01 05:51:14.219 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:51:14.225 eqi = None
2025-07-01 05:51:14.231
2025-07-01 05:51:14.243 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:51:14.254 # identical
2025-07-01 05:51:14.265
2025-07-01 05:51:14.274 # pump out diffs from before the synch point
2025-07-01 05:51:14.286 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:51:14.297
2025-07-01 05:51:14.309 # do intraline marking on the synch pair
2025-07-01 05:51:14.321 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:51:14.332 if eqi is None:
2025-07-01 05:51:14.340 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:51:14.347 atags = btags = ""
2025-07-01 05:51:14.360 cruncher.set_seqs(aelt, belt)
2025-07-01 05:51:14.367 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:51:14.374 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:51:14.381 if tag == 'replace':
2025-07-01 05:51:14.390 atags += '^' * la
2025-07-01 05:51:14.402 btags += '^' * lb
2025-07-01 05:51:14.411 elif tag == 'delete':
2025-07-01 05:51:14.419 atags += '-' * la
2025-07-01 05:51:14.427 elif tag == 'insert':
2025-07-01 05:51:14.438 btags += '+' * lb
2025-07-01 05:51:14.447 elif tag == 'equal':
2025-07-01 05:51:14.454 atags += ' ' * la
2025-07-01 05:51:14.460 btags += ' ' * lb
2025-07-01 05:51:14.472 else:
2025-07-01 05:51:14.484 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:51:14.495 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:51:14.503 else:
2025-07-01 05:51:14.509 # the synch pair is identical
2025-07-01 05:51:14.516 yield ' ' + aelt
2025-07-01 05:51:14.528
2025-07-01 05:51:14.540 # pump out diffs from after the synch point
2025-07-01 05:51:14.552 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:51:14.563
2025-07-01 05:51:14.572 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:51:14.580 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:51:14.593
2025-07-01 05:51:14.603 self = <difflib.Differ object at [hex]>
2025-07-01 05:51:14.614 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:51:14.623 alo = 493, ahi = 1101
2025-07-01 05:51:14.631 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:51:14.638 blo = 493, bhi = 1101
2025-07-01 05:51:14.645
2025-07-01 05:51:14.655 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:51:14.665 g = []
2025-07-01 05:51:14.672 if alo < ahi:
2025-07-01 05:51:14.678 if blo < bhi:
2025-07-01 05:51:14.685 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:51:14.691 else:
2025-07-01 05:51:14.698 g = self._dump('-', a, alo, ahi)
2025-07-01 05:51:14.705 elif blo < bhi:
2025-07-01 05:51:14.711 g = self._dump('+', b, blo, bhi)
2025-07-01 05:51:14.719
2025-07-01 05:51:14.730 > yield from g
2025-07-01 05:51:14.738
2025-07-01 05:51:14.745 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:51:14.751 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:51:14.756
2025-07-01 05:51:14.761 self = <difflib.Differ object at [hex]>
2025-07-01 05:51:14.766 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:51:14.771 alo = 493, ahi = 1101
2025-07-01 05:51:14.778 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:51:14.784 blo = 493, bhi = 1101
2025-07-01 05:51:14.789
2025-07-01 05:51:14.795 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:51:14.803 r"""
2025-07-01 05:51:14.812 When replacing one block of lines with another, search the blocks
2025-07-01 05:51:14.820 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:51:14.827 synch point, and intraline difference marking is done on the
2025-07-01 05:51:14.836 similar pair. Lots of work, but often worth it.
2025-07-01 05:51:14.845
2025-07-01 05:51:14.856 Example:
2025-07-01 05:51:14.865
2025-07-01 05:51:14.876 >>> d = Differ()
2025-07-01 05:51:14.886 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:51:14.897 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:51:14.906 >>> print(''.join(results), end="")
2025-07-01 05:51:14.914 - abcDefghiJkl
2025-07-01 05:51:14.936 + abcdefGhijkl
2025-07-01 05:51:14.950 """
2025-07-01 05:51:14.956
2025-07-01 05:51:14.971 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:51:14.981 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:51:14.988 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:51:14.995 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:51:15.004 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:51:15.015
2025-07-01 05:51:15.024 # search for the pair that matches best without being identical
2025-07-01 05:51:15.032 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:51:15.039 # on junk -- unless we have to)
2025-07-01 05:51:15.045 for j in range(blo, bhi):
2025-07-01 05:51:15.056 bj = b[j]
2025-07-01 05:51:15.066 cruncher.set_seq2(bj)
2025-07-01 05:51:15.075 for i in range(alo, ahi):
2025-07-01 05:51:15.084 ai = a[i]
2025-07-01 05:51:15.091 if ai == bj:
2025-07-01 05:51:15.097 if eqi is None:
2025-07-01 05:51:15.108 eqi, eqj = i, j
2025-07-01 05:51:15.118 continue
2025-07-01 05:51:15.125 cruncher.set_seq1(ai)
2025-07-01 05:51:15.131 # computing similarity is expensive, so use the quick
2025-07-01 05:51:15.136 # upper bounds first -- have seen this speed up messy
2025-07-01 05:51:15.140 # compares by a factor of 3.
2025-07-01 05:51:15.147 # note that ratio() is only expensive to compute the first
2025-07-01 05:51:15.159 # time it's called on a sequence pair; the expensive part
2025-07-01 05:51:15.169 # of the computation is cached by cruncher
2025-07-01 05:51:15.179 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:51:15.190 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:51:15.199 cruncher.ratio() > best_ratio:
2025-07-01 05:51:15.207 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:51:15.218 if best_ratio < cutoff:
2025-07-01 05:51:15.226 # no non-identical "pretty close" pair
2025-07-01 05:51:15.234 if eqi is None:
2025-07-01 05:51:15.244 # no identical pair either -- treat it as a straight replace
2025-07-01 05:51:15.255 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:51:15.266 return
2025-07-01 05:51:15.276 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:51:15.284 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:51:15.291 else:
2025-07-01 05:51:15.298 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:51:15.304 eqi = None
2025-07-01 05:51:15.311
2025-07-01 05:51:15.321 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:51:15.329 # identical
2025-07-01 05:51:15.336
2025-07-01 05:51:15.343 # pump out diffs from before the synch point
2025-07-01 05:51:15.352 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:51:15.360
2025-07-01 05:51:15.367 # do intraline marking on the synch pair
2025-07-01 05:51:15.374 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:51:15.385 if eqi is None:
2025-07-01 05:51:15.394 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:51:15.401 atags = btags = ""
2025-07-01 05:51:15.407 cruncher.set_seqs(aelt, belt)
2025-07-01 05:51:15.415 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:51:15.425 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:51:15.436 if tag == 'replace':
2025-07-01 05:51:15.447 atags += '^' * la
2025-07-01 05:51:15.455 btags += '^' * lb
2025-07-01 05:51:15.462 elif tag == 'delete':
2025-07-01 05:51:15.469 atags += '-' * la
2025-07-01 05:51:15.476 elif tag == 'insert':
2025-07-01 05:51:15.482 btags += '+' * lb
2025-07-01 05:51:15.492 elif tag == 'equal':
2025-07-01 05:51:15.501 atags += ' ' * la
2025-07-01 05:51:15.509 btags += ' ' * lb
2025-07-01 05:51:15.516 else:
2025-07-01 05:51:15.522 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:51:15.528 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:51:15.540 else:
2025-07-01 05:51:15.548 # the synch pair is identical
2025-07-01 05:51:15.554 yield ' ' + aelt
2025-07-01 05:51:15.562
2025-07-01 05:51:15.571 # pump out diffs from after the synch point
2025-07-01 05:51:15.579 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:51:15.585
2025-07-01 05:51:15.591 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:51:15.600 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:51:15.607
2025-07-01 05:51:15.613 self = <difflib.Differ object at [hex]>
2025-07-01 05:51:15.623 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:51:15.633 alo = 494, ahi = 1101
2025-07-01 05:51:15.641 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:51:15.648 blo = 494, bhi = 1101
2025-07-01 05:51:15.654
2025-07-01 05:51:15.660 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:51:15.665 g = []
2025-07-01 05:51:15.671 if alo < ahi:
2025-07-01 05:51:15.679 if blo < bhi:
2025-07-01 05:51:15.687 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:51:15.694 else:
2025-07-01 05:51:15.708 g = self._dump('-', a, alo, ahi)
2025-07-01 05:51:15.721 elif blo < bhi:
2025-07-01 05:51:15.731 g = self._dump('+', b, blo, bhi)
2025-07-01 05:51:15.738
2025-07-01 05:51:15.745 > yield from g
2025-07-01 05:51:15.751
2025-07-01 05:51:15.759 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:51:15.770 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:51:15.779
2025-07-01 05:51:15.789 self = <difflib.Differ object at [hex]>
2025-07-01 05:51:15.801 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:51:15.813 alo = 494, ahi = 1101
2025-07-01 05:51:15.824 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:51:15.833 blo = 494, bhi = 1101
2025-07-01 05:51:15.842
2025-07-01 05:51:15.851 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:51:15.860 r"""
2025-07-01 05:51:15.874 When replacing one block of lines with another, search the blocks
2025-07-01 05:51:15.882 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:51:15.891 synch point, and intraline difference marking is done on the
2025-07-01 05:51:15.903 similar pair. Lots of work, but often worth it.
2025-07-01 05:51:15.914
2025-07-01 05:51:15.922 Example:
2025-07-01 05:51:15.931
2025-07-01 05:51:15.939 >>> d = Differ()
2025-07-01 05:51:15.944 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:51:15.951 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:51:15.958 >>> print(''.join(results), end="")
2025-07-01 05:51:15.965 - abcDefghiJkl
2025-07-01 05:51:15.978 + abcdefGhijkl
2025-07-01 05:51:15.999 """
2025-07-01 05:51:16.007
2025-07-01 05:51:16.013 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:51:16.018 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:51:16.025 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:51:16.031 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:51:16.038 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:51:16.046
2025-07-01 05:51:16.058 # search for the pair that matches best without being identical
2025-07-01 05:51:16.067 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:51:16.075 # on junk -- unless we have to)
2025-07-01 05:51:16.081 for j in range(blo, bhi):
2025-07-01 05:51:16.088 bj = b[j]
2025-07-01 05:51:16.094 cruncher.set_seq2(bj)
2025-07-01 05:51:16.100 for i in range(alo, ahi):
2025-07-01 05:51:16.106 ai = a[i]
2025-07-01 05:51:16.116 if ai == bj:
2025-07-01 05:51:16.123 if eqi is None:
2025-07-01 05:51:16.130 eqi, eqj = i, j
2025-07-01 05:51:16.137 continue
2025-07-01 05:51:16.144 cruncher.set_seq1(ai)
2025-07-01 05:51:16.151 # computing similarity is expensive, so use the quick
2025-07-01 05:51:16.158 # upper bounds first -- have seen this speed up messy
2025-07-01 05:51:16.168 # compares by a factor of 3.
2025-07-01 05:51:16.174 # note that ratio() is only expensive to compute the first
2025-07-01 05:51:16.180 # time it's called on a sequence pair; the expensive part
2025-07-01 05:51:16.186 # of the computation is cached by cruncher
2025-07-01 05:51:16.192 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:51:16.198 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:51:16.204 cruncher.ratio() > best_ratio:
2025-07-01 05:51:16.210 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:51:16.221 if best_ratio < cutoff:
2025-07-01 05:51:16.231 # no non-identical "pretty close" pair
2025-07-01 05:51:16.238 if eqi is None:
2025-07-01 05:51:16.244 # no identical pair either -- treat it as a straight replace
2025-07-01 05:51:16.251 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:51:16.257 return
2025-07-01 05:51:16.263 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:51:16.272 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:51:16.283 else:
2025-07-01 05:51:16.295 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:51:16.304 eqi = None
2025-07-01 05:51:16.310
2025-07-01 05:51:16.316 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:51:16.321 # identical
2025-07-01 05:51:16.325
2025-07-01 05:51:16.331 # pump out diffs from before the synch point
2025-07-01 05:51:16.337 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:51:16.342
2025-07-01 05:51:16.348 # do intraline marking on the synch pair
2025-07-01 05:51:16.354 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:51:16.360 if eqi is None:
2025-07-01 05:51:16.369 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:51:16.382 atags = btags = ""
2025-07-01 05:51:16.393 cruncher.set_seqs(aelt, belt)
2025-07-01 05:51:16.406 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:51:16.416 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:51:16.424 if tag == 'replace':
2025-07-01 05:51:16.432 atags += '^' * la
2025-07-01 05:51:16.439 btags += '^' * lb
2025-07-01 05:51:16.446 elif tag == 'delete':
2025-07-01 05:51:16.457 atags += '-' * la
2025-07-01 05:51:16.466 elif tag == 'insert':
2025-07-01 05:51:16.477 btags += '+' * lb
2025-07-01 05:51:16.494 elif tag == 'equal':
2025-07-01 05:51:16.503 atags += ' ' * la
2025-07-01 05:51:16.512 btags += ' ' * lb
2025-07-01 05:51:16.518 else:
2025-07-01 05:51:16.523 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:51:16.528 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:51:16.533 else:
2025-07-01 05:51:16.540 # the synch pair is identical
2025-07-01 05:51:16.548 yield ' ' + aelt
2025-07-01 05:51:16.554
2025-07-01 05:51:16.560 # pump out diffs from after the synch point
2025-07-01 05:51:16.567 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:51:16.574
2025-07-01 05:51:16.584 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:51:16.593 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:51:16.601
2025-07-01 05:51:16.607 self = <difflib.Differ object at [hex]>
2025-07-01 05:51:16.614 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:51:16.619 alo = 495, ahi = 1101
2025-07-01 05:51:16.627 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:51:16.631 blo = 495, bhi = 1101
2025-07-01 05:51:16.637
2025-07-01 05:51:16.643 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:51:16.649 g = []
2025-07-01 05:51:16.654 if alo < ahi:
2025-07-01 05:51:16.663 if blo < bhi:
2025-07-01 05:51:16.672 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:51:16.680 else:
2025-07-01 05:51:16.687 g = self._dump('-', a, alo, ahi)
2025-07-01 05:51:16.694 elif blo < bhi:
2025-07-01 05:51:16.701 g = self._dump('+', b, blo, bhi)
2025-07-01 05:51:16.712
2025-07-01 05:51:16.724 > yield from g
2025-07-01 05:51:16.733
2025-07-01 05:51:16.740 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:51:16.747 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:51:16.755
2025-07-01 05:51:16.767 self = <difflib.Differ object at [hex]>
2025-07-01 05:51:16.777 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:51:16.785 alo = 495, ahi = 1101
2025-07-01 05:51:16.792 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:51:16.796 blo = 495, bhi = 1101
2025-07-01 05:51:16.801
2025-07-01 05:51:16.806 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:51:16.816 r"""
2025-07-01 05:51:16.825 When replacing one block of lines with another, search the blocks
2025-07-01 05:51:16.833 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:51:16.842 synch point, and intraline difference marking is done on the
2025-07-01 05:51:16.854 similar pair. Lots of work, but often worth it.
2025-07-01 05:51:16.863
2025-07-01 05:51:16.874 Example:
2025-07-01 05:51:16.886
2025-07-01 05:51:16.897 >>> d = Differ()
2025-07-01 05:51:16.909 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:51:16.919 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:51:16.927 >>> print(''.join(results), end="")
2025-07-01 05:51:16.933 - abcDefghiJkl
2025-07-01 05:51:16.945 + abcdefGhijkl
2025-07-01 05:51:16.964 """
2025-07-01 05:51:16.971
2025-07-01 05:51:16.978 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:51:16.990 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:51:16.999 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:51:17.008 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:51:17.015 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:51:17.021
2025-07-01 05:51:17.027 # search for the pair that matches best without being identical
2025-07-01 05:51:17.033 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:51:17.038 # on junk -- unless we have to)
2025-07-01 05:51:17.046 for j in range(blo, bhi):
2025-07-01 05:51:17.058 bj = b[j]
2025-07-01 05:51:17.069 cruncher.set_seq2(bj)
2025-07-01 05:51:17.078 for i in range(alo, ahi):
2025-07-01 05:51:17.089 ai = a[i]
2025-07-01 05:51:17.100 if ai == bj:
2025-07-01 05:51:17.109 if eqi is None:
2025-07-01 05:51:17.122 eqi, eqj = i, j
2025-07-01 05:51:17.132 continue
2025-07-01 05:51:17.142 cruncher.set_seq1(ai)
2025-07-01 05:51:17.155 # computing similarity is expensive, so use the quick
2025-07-01 05:51:17.164 # upper bounds first -- have seen this speed up messy
2025-07-01 05:51:17.170 # compares by a factor of 3.
2025-07-01 05:51:17.175 # note that ratio() is only expensive to compute the first
2025-07-01 05:51:17.186 # time it's called on a sequence pair; the expensive part
2025-07-01 05:51:17.194 # of the computation is cached by cruncher
2025-07-01 05:51:17.200 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:51:17.206 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:51:17.212 cruncher.ratio() > best_ratio:
2025-07-01 05:51:17.217 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:51:17.221 if best_ratio < cutoff:
2025-07-01 05:51:17.226 # no non-identical "pretty close" pair
2025-07-01 05:51:17.233 if eqi is None:
2025-07-01 05:51:17.245 # no identical pair either -- treat it as a straight replace
2025-07-01 05:51:17.256 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:51:17.268 return
2025-07-01 05:51:17.277 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:51:17.284 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:51:17.296 else:
2025-07-01 05:51:17.308 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:51:17.318 eqi = None
2025-07-01 05:51:17.327
2025-07-01 05:51:17.339 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:51:17.348 # identical
2025-07-01 05:51:17.355
2025-07-01 05:51:17.362 # pump out diffs from before the synch point
2025-07-01 05:51:17.369 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:51:17.376
2025-07-01 05:51:17.384 # do intraline marking on the synch pair
2025-07-01 05:51:17.391 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:51:17.399 if eqi is None:
2025-07-01 05:51:17.410 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:51:17.423 atags = btags = ""
2025-07-01 05:51:17.433 cruncher.set_seqs(aelt, belt)
2025-07-01 05:51:17.444 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:51:17.455 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:51:17.463 if tag == 'replace':
2025-07-01 05:51:17.471 atags += '^' * la
2025-07-01 05:51:17.482 btags += '^' * lb
2025-07-01 05:51:17.490 elif tag == 'delete':
2025-07-01 05:51:17.500 atags += '-' * la
2025-07-01 05:51:17.506 elif tag == 'insert':
2025-07-01 05:51:17.513 btags += '+' * lb
2025-07-01 05:51:17.519 elif tag == 'equal':
2025-07-01 05:51:17.530 atags += ' ' * la
2025-07-01 05:51:17.538 btags += ' ' * lb
2025-07-01 05:51:17.545 else:
2025-07-01 05:51:17.552 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:51:17.559 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:51:17.566 else:
2025-07-01 05:51:17.577 # the synch pair is identical
2025-07-01 05:51:17.583 yield ' ' + aelt
2025-07-01 05:51:17.589
2025-07-01 05:51:17.598 # pump out diffs from after the synch point
2025-07-01 05:51:17.610 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:51:17.621
2025-07-01 05:51:17.631 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:51:17.643 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:51:17.652
2025-07-01 05:51:17.665 self = <difflib.Differ object at [hex]>
2025-07-01 05:51:17.678 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:51:17.688 alo = 496, ahi = 1101
2025-07-01 05:51:17.697 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:51:17.704 blo = 496, bhi = 1101
2025-07-01 05:51:17.710
2025-07-01 05:51:17.715 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:51:17.720 g = []
2025-07-01 05:51:17.726 if alo < ahi:
2025-07-01 05:51:17.732 if blo < bhi:
2025-07-01 05:51:17.738 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:51:17.743 else:
2025-07-01 05:51:17.752 g = self._dump('-', a, alo, ahi)
2025-07-01 05:51:17.764 elif blo < bhi:
2025-07-01 05:51:17.775 g = self._dump('+', b, blo, bhi)
2025-07-01 05:51:17.784
2025-07-01 05:51:17.792 > yield from g
2025-07-01 05:51:17.799
2025-07-01 05:51:17.811 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:51:17.821 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:51:17.829
2025-07-01 05:51:17.836 self = <difflib.Differ object at [hex]>
2025-07-01 05:51:17.841 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:51:17.846 alo = 496, ahi = 1101
2025-07-01 05:51:17.853 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:51:17.859 blo = 496, bhi = 1101
2025-07-01 05:51:17.865
2025-07-01 05:51:17.877 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:51:17.888 r"""
2025-07-01 05:51:17.897 When replacing one block of lines with another, search the blocks
2025-07-01 05:51:17.906 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:51:17.913 synch point, and intraline difference marking is done on the
2025-07-01 05:51:17.921 similar pair. Lots of work, but often worth it.
2025-07-01 05:51:17.928
2025-07-01 05:51:17.934 Example:
2025-07-01 05:51:17.945
2025-07-01 05:51:17.953 >>> d = Differ()
2025-07-01 05:51:17.960 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:51:17.966 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:51:17.971 >>> print(''.join(results), end="")
2025-07-01 05:51:17.975 - abcDefghiJkl
2025-07-01 05:51:17.984 + abcdefGhijkl
2025-07-01 05:51:17.993 """
2025-07-01 05:51:17.999
2025-07-01 05:51:18.008 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:51:18.016 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:51:18.022 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:51:18.030 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:51:18.039 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:51:18.047
2025-07-01 05:51:18.054 # search for the pair that matches best without being identical
2025-07-01 05:51:18.064 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:51:18.073 # on junk -- unless we have to)
2025-07-01 05:51:18.083 for j in range(blo, bhi):
2025-07-01 05:51:18.091 bj = b[j]
2025-07-01 05:51:18.099 cruncher.set_seq2(bj)
2025-07-01 05:51:18.107 for i in range(alo, ahi):
2025-07-01 05:51:18.118 ai = a[i]
2025-07-01 05:51:18.126 if ai == bj:
2025-07-01 05:51:18.133 if eqi is None:
2025-07-01 05:51:18.144 eqi, eqj = i, j
2025-07-01 05:51:18.153 continue
2025-07-01 05:51:18.162 cruncher.set_seq1(ai)
2025-07-01 05:51:18.171 # computing similarity is expensive, so use the quick
2025-07-01 05:51:18.179 # upper bounds first -- have seen this speed up messy
2025-07-01 05:51:18.186 # compares by a factor of 3.
2025-07-01 05:51:18.193 # note that ratio() is only expensive to compute the first
2025-07-01 05:51:18.199 # time it's called on a sequence pair; the expensive part
2025-07-01 05:51:18.215 # of the computation is cached by cruncher
2025-07-01 05:51:18.228 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:51:18.240 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:51:18.250 cruncher.ratio() > best_ratio:
2025-07-01 05:51:18.259 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:51:18.267 if best_ratio < cutoff:
2025-07-01 05:51:18.276 # no non-identical "pretty close" pair
2025-07-01 05:51:18.283 if eqi is None:
2025-07-01 05:51:18.291 # no identical pair either -- treat it as a straight replace
2025-07-01 05:51:18.299 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:51:18.310 return
2025-07-01 05:51:18.319 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:51:18.327 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:51:18.334 else:
2025-07-01 05:51:18.344 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:51:18.357 eqi = None
2025-07-01 05:51:18.367
2025-07-01 05:51:18.374 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:51:18.387 # identical
2025-07-01 05:51:18.398
2025-07-01 05:51:18.405 # pump out diffs from before the synch point
2025-07-01 05:51:18.413 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:51:18.420
2025-07-01 05:51:18.426 # do intraline marking on the synch pair
2025-07-01 05:51:18.437 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:51:18.449 if eqi is None:
2025-07-01 05:51:18.462 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:51:18.471 atags = btags = ""
2025-07-01 05:51:18.479 cruncher.set_seqs(aelt, belt)
2025-07-01 05:51:18.486 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:51:18.492 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:51:18.498 if tag == 'replace':
2025-07-01 05:51:18.503 atags += '^' * la
2025-07-01 05:51:18.511 btags += '^' * lb
2025-07-01 05:51:18.522 elif tag == 'delete':
2025-07-01 05:51:18.531 atags += '-' * la
2025-07-01 05:51:18.539 elif tag == 'insert':
2025-07-01 05:51:18.550 btags += '+' * lb
2025-07-01 05:51:18.561 elif tag == 'equal':
2025-07-01 05:51:18.568 atags += ' ' * la
2025-07-01 05:51:18.575 btags += ' ' * lb
2025-07-01 05:51:18.583 else:
2025-07-01 05:51:18.594 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:51:18.603 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:51:18.610 else:
2025-07-01 05:51:18.620 # the synch pair is identical
2025-07-01 05:51:18.631 yield ' ' + aelt
2025-07-01 05:51:18.643
2025-07-01 05:51:18.655 # pump out diffs from after the synch point
2025-07-01 05:51:18.664 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:51:18.669
2025-07-01 05:51:18.675 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:51:18.683 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:51:18.695
2025-07-01 05:51:18.708 self = <difflib.Differ object at [hex]>
2025-07-01 05:51:18.720 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:51:18.727 alo = 497, ahi = 1101
2025-07-01 05:51:18.736 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:51:18.741 blo = 497, bhi = 1101
2025-07-01 05:51:18.745
2025-07-01 05:51:18.750 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:51:18.754 g = []
2025-07-01 05:51:18.759 if alo < ahi:
2025-07-01 05:51:18.766 if blo < bhi:
2025-07-01 05:51:18.773 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:51:18.778 else:
2025-07-01 05:51:18.782 g = self._dump('-', a, alo, ahi)
2025-07-01 05:51:18.787 elif blo < bhi:
2025-07-01 05:51:18.795 g = self._dump('+', b, blo, bhi)
2025-07-01 05:51:18.804
2025-07-01 05:51:18.811 > yield from g
2025-07-01 05:51:18.819
2025-07-01 05:51:18.826 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:51:18.831 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:51:18.835
2025-07-01 05:51:18.840 self = <difflib.Differ object at [hex]>
2025-07-01 05:51:18.845 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:51:18.849 alo = 497, ahi = 1101
2025-07-01 05:51:18.854 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:51:18.858 blo = 497, bhi = 1101
2025-07-01 05:51:18.862
2025-07-01 05:51:18.867 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:51:18.871 r"""
2025-07-01 05:51:18.876 When replacing one block of lines with another, search the blocks
2025-07-01 05:51:18.880 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:51:18.884 synch point, and intraline difference marking is done on the
2025-07-01 05:51:18.889 similar pair. Lots of work, but often worth it.
2025-07-01 05:51:18.893
2025-07-01 05:51:18.897 Example:
2025-07-01 05:51:18.901
2025-07-01 05:51:18.906 >>> d = Differ()
2025-07-01 05:51:18.910 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:51:18.915 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:51:18.919 >>> print(''.join(results), end="")
2025-07-01 05:51:18.923 - abcDefghiJkl
2025-07-01 05:51:18.932 + abcdefGhijkl
2025-07-01 05:51:18.941 """
2025-07-01 05:51:18.945
2025-07-01 05:51:18.951 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:51:18.956 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:51:18.967 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:51:18.979 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:51:18.989 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:51:18.997
2025-07-01 05:51:19.008 # search for the pair that matches best without being identical
2025-07-01 05:51:19.020 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:51:19.029 # on junk -- unless we have to)
2025-07-01 05:51:19.043 for j in range(blo, bhi):
2025-07-01 05:51:19.053 bj = b[j]
2025-07-01 05:51:19.065 cruncher.set_seq2(bj)
2025-07-01 05:51:19.077 for i in range(alo, ahi):
2025-07-01 05:51:19.086 ai = a[i]
2025-07-01 05:51:19.098 if ai == bj:
2025-07-01 05:51:19.108 if eqi is None:
2025-07-01 05:51:19.116 eqi, eqj = i, j
2025-07-01 05:51:19.123 continue
2025-07-01 05:51:19.133 cruncher.set_seq1(ai)
2025-07-01 05:51:19.145 # computing similarity is expensive, so use the quick
2025-07-01 05:51:19.157 # upper bounds first -- have seen this speed up messy
2025-07-01 05:51:19.167 # compares by a factor of 3.
2025-07-01 05:51:19.176 # note that ratio() is only expensive to compute the first
2025-07-01 05:51:19.184 # time it's called on a sequence pair; the expensive part
2025-07-01 05:51:19.191 # of the computation is cached by cruncher
2025-07-01 05:51:19.197 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:51:19.202 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:51:19.215 cruncher.ratio() > best_ratio:
2025-07-01 05:51:19.224 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:51:19.234 if best_ratio < cutoff:
2025-07-01 05:51:19.244 # no non-identical "pretty close" pair
2025-07-01 05:51:19.253 if eqi is None:
2025-07-01 05:51:19.260 # no identical pair either -- treat it as a straight replace
2025-07-01 05:51:19.268 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:51:19.274 return
2025-07-01 05:51:19.286 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:51:19.296 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:51:19.309 else:
2025-07-01 05:51:19.316 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:51:19.323 eqi = None
2025-07-01 05:51:19.334
2025-07-01 05:51:19.343 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:51:19.351 # identical
2025-07-01 05:51:19.358
2025-07-01 05:51:19.363 # pump out diffs from before the synch point
2025-07-01 05:51:19.372 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:51:19.379
2025-07-01 05:51:19.386 # do intraline marking on the synch pair
2025-07-01 05:51:19.393 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:51:19.402 if eqi is None:
2025-07-01 05:51:19.413 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:51:19.425 atags = btags = ""
2025-07-01 05:51:19.435 cruncher.set_seqs(aelt, belt)
2025-07-01 05:51:19.443 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:51:19.450 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:51:19.457 if tag == 'replace':
2025-07-01 05:51:19.462 atags += '^' * la
2025-07-01 05:51:19.467 btags += '^' * lb
2025-07-01 05:51:19.471 elif tag == 'delete':
2025-07-01 05:51:19.477 atags += '-' * la
2025-07-01 05:51:19.485 elif tag == 'insert':
2025-07-01 05:51:19.491 btags += '+' * lb
2025-07-01 05:51:19.497 elif tag == 'equal':
2025-07-01 05:51:19.502 atags += ' ' * la
2025-07-01 05:51:19.507 btags += ' ' * lb
2025-07-01 05:51:19.519 else:
2025-07-01 05:51:19.527 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:51:19.532 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:51:19.538 else:
2025-07-01 05:51:19.544 # the synch pair is identical
2025-07-01 05:51:19.549 yield ' ' + aelt
2025-07-01 05:51:19.554
2025-07-01 05:51:19.560 # pump out diffs from after the synch point
2025-07-01 05:51:19.566 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:51:19.576
2025-07-01 05:51:19.585 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:51:19.593 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:51:19.599
2025-07-01 05:51:19.605 self = <difflib.Differ object at [hex]>
2025-07-01 05:51:19.612 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:51:19.618 alo = 498, ahi = 1101
2025-07-01 05:51:19.627 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:51:19.638 blo = 498, bhi = 1101
2025-07-01 05:51:19.644
2025-07-01 05:51:19.650 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:51:19.658 g = []
2025-07-01 05:51:19.665 if alo < ahi:
2025-07-01 05:51:19.670 if blo < bhi:
2025-07-01 05:51:19.677 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:51:19.683 else:
2025-07-01 05:51:19.689 g = self._dump('-', a, alo, ahi)
2025-07-01 05:51:19.696 elif blo < bhi:
2025-07-01 05:51:19.703 g = self._dump('+', b, blo, bhi)
2025-07-01 05:51:19.711
2025-07-01 05:51:19.723 > yield from g
2025-07-01 05:51:19.733
2025-07-01 05:51:19.744 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:51:19.755 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:51:19.763
2025-07-01 05:51:19.771 self = <difflib.Differ object at [hex]>
2025-07-01 05:51:19.779 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:51:19.784 alo = 498, ahi = 1101
2025-07-01 05:51:19.791 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:51:19.799 blo = 498, bhi = 1101
2025-07-01 05:51:19.810
2025-07-01 05:51:19.821 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:51:19.829 r"""
2025-07-01 05:51:19.835 When replacing one block of lines with another, search the blocks
2025-07-01 05:51:19.843 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:51:19.851 synch point, and intraline difference marking is done on the
2025-07-01 05:51:19.863 similar pair. Lots of work, but often worth it.
2025-07-01 05:51:19.872
2025-07-01 05:51:19.883 Example:
2025-07-01 05:51:19.891
2025-07-01 05:51:19.899 >>> d = Differ()
2025-07-01 05:51:19.907 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:51:19.915 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:51:19.926 >>> print(''.join(results), end="")
2025-07-01 05:51:19.935 - abcDefghiJkl
2025-07-01 05:51:19.954 + abcdefGhijkl
2025-07-01 05:51:19.971 """
2025-07-01 05:51:19.983
2025-07-01 05:51:19.991 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:51:19.998 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:51:20.007 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:51:20.017 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:51:20.028 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:51:20.041
2025-07-01 05:51:20.053 # search for the pair that matches best without being identical
2025-07-01 05:51:20.064 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:51:20.077 # on junk -- unless we have to)
2025-07-01 05:51:20.087 for j in range(blo, bhi):
2025-07-01 05:51:20.096 bj = b[j]
2025-07-01 05:51:20.103 cruncher.set_seq2(bj)
2025-07-01 05:51:20.111 for i in range(alo, ahi):
2025-07-01 05:51:20.121 ai = a[i]
2025-07-01 05:51:20.131 if ai == bj:
2025-07-01 05:51:20.139 if eqi is None:
2025-07-01 05:51:20.146 eqi, eqj = i, j
2025-07-01 05:51:20.153 continue
2025-07-01 05:51:20.158 cruncher.set_seq1(ai)
2025-07-01 05:51:20.165 # computing similarity is expensive, so use the quick
2025-07-01 05:51:20.170 # upper bounds first -- have seen this speed up messy
2025-07-01 05:51:20.175 # compares by a factor of 3.
2025-07-01 05:51:20.182 # note that ratio() is only expensive to compute the first
2025-07-01 05:51:20.188 # time it's called on a sequence pair; the expensive part
2025-07-01 05:51:20.193 # of the computation is cached by cruncher
2025-07-01 05:51:20.200 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:51:20.207 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:51:20.212 cruncher.ratio() > best_ratio:
2025-07-01 05:51:20.218 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:51:20.223 if best_ratio < cutoff:
2025-07-01 05:51:20.236 # no non-identical "pretty close" pair
2025-07-01 05:51:20.245 if eqi is None:
2025-07-01 05:51:20.253 # no identical pair either -- treat it as a straight replace
2025-07-01 05:51:20.260 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:51:20.268 return
2025-07-01 05:51:20.275 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:51:20.282 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:51:20.292 else:
2025-07-01 05:51:20.304 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:51:20.313 eqi = None
2025-07-01 05:51:20.322
2025-07-01 05:51:20.330 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:51:20.341 # identical
2025-07-01 05:51:20.351
2025-07-01 05:51:20.360 # pump out diffs from before the synch point
2025-07-01 05:51:20.368 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:51:20.374
2025-07-01 05:51:20.380 # do intraline marking on the synch pair
2025-07-01 05:51:20.386 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:51:20.399 if eqi is None:
2025-07-01 05:51:20.407 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:51:20.415 atags = btags = ""
2025-07-01 05:51:20.422 cruncher.set_seqs(aelt, belt)
2025-07-01 05:51:20.430 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:51:20.438 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:51:20.445 if tag == 'replace':
2025-07-01 05:51:20.452 atags += '^' * la
2025-07-01 05:51:20.464 btags += '^' * lb
2025-07-01 05:51:20.477 elif tag == 'delete':
2025-07-01 05:51:20.487 atags += '-' * la
2025-07-01 05:51:20.496 elif tag == 'insert':
2025-07-01 05:51:20.504 btags += '+' * lb
2025-07-01 05:51:20.511 elif tag == 'equal':
2025-07-01 05:51:20.518 atags += ' ' * la
2025-07-01 05:51:20.530 btags += ' ' * lb
2025-07-01 05:51:20.539 else:
2025-07-01 05:51:20.548 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:51:20.562 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:51:20.571 else:
2025-07-01 05:51:20.579 # the synch pair is identical
2025-07-01 05:51:20.586 yield ' ' + aelt
2025-07-01 05:51:20.593
2025-07-01 05:51:20.601 # pump out diffs from after the synch point
2025-07-01 05:51:20.612 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:51:20.622
2025-07-01 05:51:20.630 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:51:20.640 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:51:20.651
2025-07-01 05:51:20.663 self = <difflib.Differ object at [hex]>
2025-07-01 05:51:20.675 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:51:20.687 alo = 499, ahi = 1101
2025-07-01 05:51:20.697 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:51:20.708 blo = 499, bhi = 1101
2025-07-01 05:51:20.716
2025-07-01 05:51:20.724 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:51:20.735 g = []
2025-07-01 05:51:20.747 if alo < ahi:
2025-07-01 05:51:20.755 if blo < bhi:
2025-07-01 05:51:20.762 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:51:20.767 else:
2025-07-01 05:51:20.773 g = self._dump('-', a, alo, ahi)
2025-07-01 05:51:20.781 elif blo < bhi:
2025-07-01 05:51:20.789 g = self._dump('+', b, blo, bhi)
2025-07-01 05:51:20.796
2025-07-01 05:51:20.802 > yield from g
2025-07-01 05:51:20.809
2025-07-01 05:51:20.816 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:51:20.823 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:51:20.830
2025-07-01 05:51:20.837 self = <difflib.Differ object at [hex]>
2025-07-01 05:51:20.845 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:51:20.852 alo = 499, ahi = 1101
2025-07-01 05:51:20.860 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:51:20.867 blo = 499, bhi = 1101
2025-07-01 05:51:20.873
2025-07-01 05:51:20.880 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:51:20.887 r"""
2025-07-01 05:51:20.898 When replacing one block of lines with another, search the blocks
2025-07-01 05:51:20.908 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:51:20.916 synch point, and intraline difference marking is done on the
2025-07-01 05:51:20.922 similar pair. Lots of work, but often worth it.
2025-07-01 05:51:20.930
2025-07-01 05:51:20.941 Example:
2025-07-01 05:51:20.951
2025-07-01 05:51:20.958 >>> d = Differ()
2025-07-01 05:51:20.964 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:51:20.970 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:51:20.974 >>> print(''.join(results), end="")
2025-07-01 05:51:20.979 - abcDefghiJkl
2025-07-01 05:51:20.991 + abcdefGhijkl
2025-07-01 05:51:21.009 """
2025-07-01 05:51:21.018
2025-07-01 05:51:21.024 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:51:21.031 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:51:21.036 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:51:21.042 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:51:21.052 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:51:21.062
2025-07-01 05:51:21.072 # search for the pair that matches best without being identical
2025-07-01 05:51:21.079 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:51:21.087 # on junk -- unless we have to)
2025-07-01 05:51:21.097 for j in range(blo, bhi):
2025-07-01 05:51:21.108 bj = b[j]
2025-07-01 05:51:21.118 cruncher.set_seq2(bj)
2025-07-01 05:51:21.126 for i in range(alo, ahi):
2025-07-01 05:51:21.133 ai = a[i]
2025-07-01 05:51:21.139 if ai == bj:
2025-07-01 05:51:21.146 if eqi is None:
2025-07-01 05:51:21.152 eqi, eqj = i, j
2025-07-01 05:51:21.160 continue
2025-07-01 05:51:21.166 cruncher.set_seq1(ai)
2025-07-01 05:51:21.172 # computing similarity is expensive, so use the quick
2025-07-01 05:51:21.178 # upper bounds first -- have seen this speed up messy
2025-07-01 05:51:21.184 # compares by a factor of 3.
2025-07-01 05:51:21.190 # note that ratio() is only expensive to compute the first
2025-07-01 05:51:21.195 # time it's called on a sequence pair; the expensive part
2025-07-01 05:51:21.201 # of the computation is cached by cruncher
2025-07-01 05:51:21.207 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:51:21.213 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:51:21.219 cruncher.ratio() > best_ratio:
2025-07-01 05:51:21.225 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:51:21.230 if best_ratio < cutoff:
2025-07-01 05:51:21.236 # no non-identical "pretty close" pair
2025-07-01 05:51:21.242 if eqi is None:
2025-07-01 05:51:21.248 # no identical pair either -- treat it as a straight replace
2025-07-01 05:51:21.254 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:51:21.259 return
2025-07-01 05:51:21.265 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:51:21.271 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:51:21.277 else:
2025-07-01 05:51:21.282 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:51:21.293 eqi = None
2025-07-01 05:51:21.300
2025-07-01 05:51:21.307 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:51:21.314 # identical
2025-07-01 05:51:21.325
2025-07-01 05:51:21.335 # pump out diffs from before the synch point
2025-07-01 05:51:21.344 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:51:21.349
2025-07-01 05:51:21.355 # do intraline marking on the synch pair
2025-07-01 05:51:21.361 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:51:21.368 if eqi is None:
2025-07-01 05:51:21.374 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:51:21.388 atags = btags = ""
2025-07-01 05:51:21.399 cruncher.set_seqs(aelt, belt)
2025-07-01 05:51:21.408 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:51:21.416 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:51:21.424 if tag == 'replace':
2025-07-01 05:51:21.429 atags += '^' * la
2025-07-01 05:51:21.435 btags += '^' * lb
2025-07-01 05:51:21.442 elif tag == 'delete':
2025-07-01 05:51:21.448 atags += '-' * la
2025-07-01 05:51:21.454 elif tag == 'insert':
2025-07-01 05:51:21.461 btags += '+' * lb
2025-07-01 05:51:21.467 elif tag == 'equal':
2025-07-01 05:51:21.474 atags += ' ' * la
2025-07-01 05:51:21.486 btags += ' ' * lb
2025-07-01 05:51:21.493 else:
2025-07-01 05:51:21.499 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:51:21.506 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:51:21.516 else:
2025-07-01 05:51:21.526 # the synch pair is identical
2025-07-01 05:51:21.532 yield ' ' + aelt
2025-07-01 05:51:21.539
2025-07-01 05:51:21.546 # pump out diffs from after the synch point
2025-07-01 05:51:21.552 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:51:21.559
2025-07-01 05:51:21.565 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:51:21.573 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:51:21.578
2025-07-01 05:51:21.587 self = <difflib.Differ object at [hex]>
2025-07-01 05:51:21.594 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:51:21.600 alo = 500, ahi = 1101
2025-07-01 05:51:21.610 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:51:21.619 blo = 500, bhi = 1101
2025-07-01 05:51:21.627
2025-07-01 05:51:21.634 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:51:21.645 g = []
2025-07-01 05:51:21.653 if alo < ahi:
2025-07-01 05:51:21.662 if blo < bhi:
2025-07-01 05:51:21.672 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:51:21.680 else:
2025-07-01 05:51:21.688 g = self._dump('-', a, alo, ahi)
2025-07-01 05:51:21.694 elif blo < bhi:
2025-07-01 05:51:21.705 g = self._dump('+', b, blo, bhi)
2025-07-01 05:51:21.711
2025-07-01 05:51:21.717 > yield from g
2025-07-01 05:51:21.726
2025-07-01 05:51:21.732 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:51:21.738 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:51:21.744
2025-07-01 05:51:21.750 self = <difflib.Differ object at [hex]>
2025-07-01 05:51:21.757 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:51:21.764 alo = 500, ahi = 1101
2025-07-01 05:51:21.774 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:51:21.781 blo = 500, bhi = 1101
2025-07-01 05:51:21.788
2025-07-01 05:51:21.799 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:51:21.809 r"""
2025-07-01 05:51:21.817 When replacing one block of lines with another, search the blocks
2025-07-01 05:51:21.823 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:51:21.829 synch point, and intraline difference marking is done on the
2025-07-01 05:51:21.835 similar pair. Lots of work, but often worth it.
2025-07-01 05:51:21.840
2025-07-01 05:51:21.846 Example:
2025-07-01 05:51:21.852
2025-07-01 05:51:21.858 >>> d = Differ()
2025-07-01 05:51:21.864 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:51:21.871 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:51:21.877 >>> print(''.join(results), end="")
2025-07-01 05:51:21.883 - abcDefghiJkl
2025-07-01 05:51:21.894 + abcdefGhijkl
2025-07-01 05:51:21.906 """
2025-07-01 05:51:21.912
2025-07-01 05:51:21.918 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:51:21.924 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:51:21.930 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:51:21.937 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:51:21.945 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:51:21.952
2025-07-01 05:51:21.957 # search for the pair that matches best without being identical
2025-07-01 05:51:21.963 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:51:21.968 # on junk -- unless we have to)
2025-07-01 05:51:21.974 for j in range(blo, bhi):
2025-07-01 05:51:21.981 bj = b[j]
2025-07-01 05:51:21.987 cruncher.set_seq2(bj)
2025-07-01 05:51:21.993 for i in range(alo, ahi):
2025-07-01 05:51:21.998 ai = a[i]
2025-07-01 05:51:22.003 if ai == bj:
2025-07-01 05:51:22.008 if eqi is None:
2025-07-01 05:51:22.013 eqi, eqj = i, j
2025-07-01 05:51:22.018 continue
2025-07-01 05:51:22.023 cruncher.set_seq1(ai)
2025-07-01 05:51:22.028 # computing similarity is expensive, so use the quick
2025-07-01 05:51:22.033 # upper bounds first -- have seen this speed up messy
2025-07-01 05:51:22.038 # compares by a factor of 3.
2025-07-01 05:51:22.043 # note that ratio() is only expensive to compute the first
2025-07-01 05:51:22.049 # time it's called on a sequence pair; the expensive part
2025-07-01 05:51:22.054 # of the computation is cached by cruncher
2025-07-01 05:51:22.060 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:51:22.067 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:51:22.073 cruncher.ratio() > best_ratio:
2025-07-01 05:51:22.078 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:51:22.083 if best_ratio < cutoff:
2025-07-01 05:51:22.088 # no non-identical "pretty close" pair
2025-07-01 05:51:22.093 if eqi is None:
2025-07-01 05:51:22.098 # no identical pair either -- treat it as a straight replace
2025-07-01 05:51:22.103 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:51:22.107 return
2025-07-01 05:51:22.112 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:51:22.118 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:51:22.130 else:
2025-07-01 05:51:22.139 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:51:22.147 eqi = None
2025-07-01 05:51:22.153
2025-07-01 05:51:22.158 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:51:22.165 # identical
2025-07-01 05:51:22.170
2025-07-01 05:51:22.181 # pump out diffs from before the synch point
2025-07-01 05:51:22.192 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:51:22.201
2025-07-01 05:51:22.209 # do intraline marking on the synch pair
2025-07-01 05:51:22.217 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:51:22.224 if eqi is None:
2025-07-01 05:51:22.231 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:51:22.239 atags = btags = ""
2025-07-01 05:51:22.247 cruncher.set_seqs(aelt, belt)
2025-07-01 05:51:22.254 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:51:22.260 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:51:22.266 if tag == 'replace':
2025-07-01 05:51:22.273 atags += '^' * la
2025-07-01 05:51:22.282 btags += '^' * lb
2025-07-01 05:51:22.290 elif tag == 'delete':
2025-07-01 05:51:22.297 atags += '-' * la
2025-07-01 05:51:22.303 elif tag == 'insert':
2025-07-01 05:51:22.311 btags += '+' * lb
2025-07-01 05:51:22.324 elif tag == 'equal':
2025-07-01 05:51:22.336 atags += ' ' * la
2025-07-01 05:51:22.345 btags += ' ' * lb
2025-07-01 05:51:22.352 else:
2025-07-01 05:51:22.358 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:51:22.364 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:51:22.377 else:
2025-07-01 05:51:22.386 # the synch pair is identical
2025-07-01 05:51:22.394 yield ' ' + aelt
2025-07-01 05:51:22.401
2025-07-01 05:51:22.408 # pump out diffs from after the synch point
2025-07-01 05:51:22.414 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:51:22.419
2025-07-01 05:51:22.424 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:51:22.430 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:51:22.436
2025-07-01 05:51:22.441 self = <difflib.Differ object at [hex]>
2025-07-01 05:51:22.447 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:51:22.453 alo = 501, ahi = 1101
2025-07-01 05:51:22.459 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:51:22.468 blo = 501, bhi = 1101
2025-07-01 05:51:22.480
2025-07-01 05:51:22.488 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:51:22.500 g = []
2025-07-01 05:51:22.509 if alo < ahi:
2025-07-01 05:51:22.516 if blo < bhi:
2025-07-01 05:51:22.522 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:51:22.527 else:
2025-07-01 05:51:22.532 g = self._dump('-', a, alo, ahi)
2025-07-01 05:51:22.537 elif blo < bhi:
2025-07-01 05:51:22.542 g = self._dump('+', b, blo, bhi)
2025-07-01 05:51:22.546
2025-07-01 05:51:22.551 > yield from g
2025-07-01 05:51:22.556
2025-07-01 05:51:22.561 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:51:22.567 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:51:22.573
2025-07-01 05:51:22.579 self = <difflib.Differ object at [hex]>
2025-07-01 05:51:22.587 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:51:22.597 alo = 501, ahi = 1101
2025-07-01 05:51:22.607 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:51:22.618 blo = 501, bhi = 1101
2025-07-01 05:51:22.624
2025-07-01 05:51:22.631 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:51:22.642 r"""
2025-07-01 05:51:22.653 When replacing one block of lines with another, search the blocks
2025-07-01 05:51:22.663 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:51:22.671 synch point, and intraline difference marking is done on the
2025-07-01 05:51:22.677 similar pair. Lots of work, but often worth it.
2025-07-01 05:51:22.682
2025-07-01 05:51:22.686 Example:
2025-07-01 05:51:22.695
2025-07-01 05:51:22.705 >>> d = Differ()
2025-07-01 05:51:22.713 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:51:22.720 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:51:22.727 >>> print(''.join(results), end="")
2025-07-01 05:51:22.738 - abcDefghiJkl
2025-07-01 05:51:22.752 + abcdefGhijkl
2025-07-01 05:51:22.763 """
2025-07-01 05:51:22.768
2025-07-01 05:51:22.774 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:51:22.781 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:51:22.789 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:51:22.796 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:51:22.809 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:51:22.817
2025-07-01 05:51:22.823 # search for the pair that matches best without being identical
2025-07-01 05:51:22.829 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:51:22.835 # on junk -- unless we have to)
2025-07-01 05:51:22.841 for j in range(blo, bhi):
2025-07-01 05:51:22.845 bj = b[j]
2025-07-01 05:51:22.852 cruncher.set_seq2(bj)
2025-07-01 05:51:22.864 for i in range(alo, ahi):
2025-07-01 05:51:22.875 ai = a[i]
2025-07-01 05:51:22.883 if ai == bj:
2025-07-01 05:51:22.894 if eqi is None:
2025-07-01 05:51:22.903 eqi, eqj = i, j
2025-07-01 05:51:22.914 continue
2025-07-01 05:51:22.921 cruncher.set_seq1(ai)
2025-07-01 05:51:22.928 # computing similarity is expensive, so use the quick
2025-07-01 05:51:22.935 # upper bounds first -- have seen this speed up messy
2025-07-01 05:51:22.948 # compares by a factor of 3.
2025-07-01 05:51:22.959 # note that ratio() is only expensive to compute the first
2025-07-01 05:51:22.968 # time it's called on a sequence pair; the expensive part
2025-07-01 05:51:22.976 # of the computation is cached by cruncher
2025-07-01 05:51:22.983 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:51:22.991 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:51:23.003 cruncher.ratio() > best_ratio:
2025-07-01 05:51:23.010 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:51:23.018 if best_ratio < cutoff:
2025-07-01 05:51:23.026 # no non-identical "pretty close" pair
2025-07-01 05:51:23.032 if eqi is None:
2025-07-01 05:51:23.038 # no identical pair either -- treat it as a straight replace
2025-07-01 05:51:23.045 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:51:23.051 return
2025-07-01 05:51:23.058 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:51:23.065 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:51:23.071 else:
2025-07-01 05:51:23.078 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:51:23.084 eqi = None
2025-07-01 05:51:23.089
2025-07-01 05:51:23.095 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:51:23.100 # identical
2025-07-01 05:51:23.106
2025-07-01 05:51:23.114 # pump out diffs from before the synch point
2025-07-01 05:51:23.124 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:51:23.130
2025-07-01 05:51:23.136 # do intraline marking on the synch pair
2025-07-01 05:51:23.142 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:51:23.149 if eqi is None:
2025-07-01 05:51:23.160 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:51:23.172 atags = btags = ""
2025-07-01 05:51:23.182 cruncher.set_seqs(aelt, belt)
2025-07-01 05:51:23.188 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:51:23.195 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:51:23.204 if tag == 'replace':
2025-07-01 05:51:23.216 atags += '^' * la
2025-07-01 05:51:23.226 btags += '^' * lb
2025-07-01 05:51:23.237 elif tag == 'delete':
2025-07-01 05:51:23.247 atags += '-' * la
2025-07-01 05:51:23.256 elif tag == 'insert':
2025-07-01 05:51:23.267 btags += '+' * lb
2025-07-01 05:51:23.276 elif tag == 'equal':
2025-07-01 05:51:23.282 atags += ' ' * la
2025-07-01 05:51:23.288 btags += ' ' * lb
2025-07-01 05:51:23.295 else:
2025-07-01 05:51:23.301 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:51:23.313 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:51:23.322 else:
2025-07-01 05:51:23.329 # the synch pair is identical
2025-07-01 05:51:23.335 yield ' ' + aelt
2025-07-01 05:51:23.340
2025-07-01 05:51:23.345 # pump out diffs from after the synch point
2025-07-01 05:51:23.351 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:51:23.361
2025-07-01 05:51:23.371 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:51:23.379 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:51:23.385
2025-07-01 05:51:23.393 self = <difflib.Differ object at [hex]>
2025-07-01 05:51:23.401 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:51:23.407 alo = 502, ahi = 1101
2025-07-01 05:51:23.415 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:51:23.423 blo = 502, bhi = 1101
2025-07-01 05:51:23.431
2025-07-01 05:51:23.443 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:51:23.453 g = []
2025-07-01 05:51:23.463 if alo < ahi:
2025-07-01 05:51:23.475 if blo < bhi:
2025-07-01 05:51:23.487 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:51:23.496 else:
2025-07-01 05:51:23.505 g = self._dump('-', a, alo, ahi)
2025-07-01 05:51:23.513 elif blo < bhi:
2025-07-01 05:51:23.520 g = self._dump('+', b, blo, bhi)
2025-07-01 05:51:23.526
2025-07-01 05:51:23.531 > yield from g
2025-07-01 05:51:23.539
2025-07-01 05:51:23.553 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:51:23.560 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:51:23.567
2025-07-01 05:51:23.574 self = <difflib.Differ object at [hex]>
2025-07-01 05:51:23.582 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:51:23.588 alo = 502, ahi = 1101
2025-07-01 05:51:23.595 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:51:23.601 blo = 502, bhi = 1101
2025-07-01 05:51:23.606
2025-07-01 05:51:23.616 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:51:23.624 r"""
2025-07-01 05:51:23.630 When replacing one block of lines with another, search the blocks
2025-07-01 05:51:23.641 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:51:23.652 synch point, and intraline difference marking is done on the
2025-07-01 05:51:23.661 similar pair. Lots of work, but often worth it.
2025-07-01 05:51:23.671
2025-07-01 05:51:23.679 Example:
2025-07-01 05:51:23.686
2025-07-01 05:51:23.693 >>> d = Differ()
2025-07-01 05:51:23.700 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:51:23.710 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:51:23.722 >>> print(''.join(results), end="")
2025-07-01 05:51:23.731 - abcDefghiJkl
2025-07-01 05:51:23.754 + abcdefGhijkl
2025-07-01 05:51:23.770 """
2025-07-01 05:51:23.782
2025-07-01 05:51:23.790 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:51:23.801 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:51:23.809 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:51:23.816 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:51:23.823 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:51:23.834
2025-07-01 05:51:23.842 # search for the pair that matches best without being identical
2025-07-01 05:51:23.848 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:51:23.854 # on junk -- unless we have to)
2025-07-01 05:51:23.860 for j in range(blo, bhi):
2025-07-01 05:51:23.867 bj = b[j]
2025-07-01 05:51:23.879 cruncher.set_seq2(bj)
2025-07-01 05:51:23.887 for i in range(alo, ahi):
2025-07-01 05:51:23.894 ai = a[i]
2025-07-01 05:51:23.899 if ai == bj:
2025-07-01 05:51:23.905 if eqi is None:
2025-07-01 05:51:23.911 eqi, eqj = i, j
2025-07-01 05:51:23.923 continue
2025-07-01 05:51:23.933 cruncher.set_seq1(ai)
2025-07-01 05:51:23.946 # computing similarity is expensive, so use the quick
2025-07-01 05:51:23.956 # upper bounds first -- have seen this speed up messy
2025-07-01 05:51:23.965 # compares by a factor of 3.
2025-07-01 05:51:23.972 # note that ratio() is only expensive to compute the first
2025-07-01 05:51:23.978 # time it's called on a sequence pair; the expensive part
2025-07-01 05:51:23.984 # of the computation is cached by cruncher
2025-07-01 05:51:23.991 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:51:24.004 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:51:24.015 cruncher.ratio() > best_ratio:
2025-07-01 05:51:24.022 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:51:24.029 if best_ratio < cutoff:
2025-07-01 05:51:24.033 # no non-identical "pretty close" pair
2025-07-01 05:51:24.038 if eqi is None:
2025-07-01 05:51:24.043 # no identical pair either -- treat it as a straight replace
2025-07-01 05:51:24.049 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:51:24.057 return
2025-07-01 05:51:24.069 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:51:24.080 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:51:24.091 else:
2025-07-01 05:51:24.102 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:51:24.113 eqi = None
2025-07-01 05:51:24.125
2025-07-01 05:51:24.135 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:51:24.143 # identical
2025-07-01 05:51:24.151
2025-07-01 05:51:24.158 # pump out diffs from before the synch point
2025-07-01 05:51:24.170 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:51:24.181
2025-07-01 05:51:24.190 # do intraline marking on the synch pair
2025-07-01 05:51:24.197 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:51:24.204 if eqi is None:
2025-07-01 05:51:24.210 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:51:24.217 atags = btags = ""
2025-07-01 05:51:24.222 cruncher.set_seqs(aelt, belt)
2025-07-01 05:51:24.228 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:51:24.234 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:51:24.241 if tag == 'replace':
2025-07-01 05:51:24.248 atags += '^' * la
2025-07-01 05:51:24.254 btags += '^' * lb
2025-07-01 05:51:24.261 elif tag == 'delete':
2025-07-01 05:51:24.268 atags += '-' * la
2025-07-01 05:51:24.275 elif tag == 'insert':
2025-07-01 05:51:24.283 btags += '+' * lb
2025-07-01 05:51:24.294 elif tag == 'equal':
2025-07-01 05:51:24.302 atags += ' ' * la
2025-07-01 05:51:24.308 btags += ' ' * lb
2025-07-01 05:51:24.314 else:
2025-07-01 05:51:24.321 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:51:24.328 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:51:24.335 else:
2025-07-01 05:51:24.342 # the synch pair is identical
2025-07-01 05:51:24.351 yield ' ' + aelt
2025-07-01 05:51:24.360
2025-07-01 05:51:24.371 # pump out diffs from after the synch point
2025-07-01 05:51:24.382 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:51:24.393
2025-07-01 05:51:24.403 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:51:24.409 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:51:24.415
2025-07-01 05:51:24.421 self = <difflib.Differ object at [hex]>
2025-07-01 05:51:24.432 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:51:24.443 alo = 503, ahi = 1101
2025-07-01 05:51:24.454 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:51:24.464 blo = 503, bhi = 1101
2025-07-01 05:51:24.471
2025-07-01 05:51:24.479 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:51:24.493 g = []
2025-07-01 05:51:24.504 if alo < ahi:
2025-07-01 05:51:24.512 if blo < bhi:
2025-07-01 05:51:24.521 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:51:24.529 else:
2025-07-01 05:51:24.535 g = self._dump('-', a, alo, ahi)
2025-07-01 05:51:24.541 elif blo < bhi:
2025-07-01 05:51:24.546 g = self._dump('+', b, blo, bhi)
2025-07-01 05:51:24.555
2025-07-01 05:51:24.564 > yield from g
2025-07-01 05:51:24.570
2025-07-01 05:51:24.578 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:51:24.586 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:51:24.596
2025-07-01 05:51:24.605 self = <difflib.Differ object at [hex]>
2025-07-01 05:51:24.613 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:51:24.620 alo = 503, ahi = 1101
2025-07-01 05:51:24.628 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:51:24.635 blo = 503, bhi = 1101
2025-07-01 05:51:24.643
2025-07-01 05:51:24.654 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:51:24.662 r"""
2025-07-01 05:51:24.674 When replacing one block of lines with another, search the blocks
2025-07-01 05:51:24.686 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:51:24.695 synch point, and intraline difference marking is done on the
2025-07-01 05:51:24.702 similar pair. Lots of work, but often worth it.
2025-07-01 05:51:24.708
2025-07-01 05:51:24.713 Example:
2025-07-01 05:51:24.719
2025-07-01 05:51:24.724 >>> d = Differ()
2025-07-01 05:51:24.730 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:51:24.736 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:51:24.741 >>> print(''.join(results), end="")
2025-07-01 05:51:24.747 - abcDefghiJkl
2025-07-01 05:51:24.758 + abcdefGhijkl
2025-07-01 05:51:24.771 """
2025-07-01 05:51:24.779
2025-07-01 05:51:24.788 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:51:24.795 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:51:24.801 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:51:24.809 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:51:24.817 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:51:24.823
2025-07-01 05:51:24.830 # search for the pair that matches best without being identical
2025-07-01 05:51:24.837 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:51:24.844 # on junk -- unless we have to)
2025-07-01 05:51:24.850 for j in range(blo, bhi):
2025-07-01 05:51:24.857 bj = b[j]
2025-07-01 05:51:24.863 cruncher.set_seq2(bj)
2025-07-01 05:51:24.870 for i in range(alo, ahi):
2025-07-01 05:51:24.880 ai = a[i]
2025-07-01 05:51:24.888 if ai == bj:
2025-07-01 05:51:24.894 if eqi is None:
2025-07-01 05:51:24.900 eqi, eqj = i, j
2025-07-01 05:51:24.905 continue
2025-07-01 05:51:24.911 cruncher.set_seq1(ai)
2025-07-01 05:51:24.918 # computing similarity is expensive, so use the quick
2025-07-01 05:51:24.928 # upper bounds first -- have seen this speed up messy
2025-07-01 05:51:24.937 # compares by a factor of 3.
2025-07-01 05:51:24.945 # note that ratio() is only expensive to compute the first
2025-07-01 05:51:24.951 # time it's called on a sequence pair; the expensive part
2025-07-01 05:51:24.958 # of the computation is cached by cruncher
2025-07-01 05:51:24.964 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:51:24.969 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:51:24.974 cruncher.ratio() > best_ratio:
2025-07-01 05:51:24.979 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:51:24.984 if best_ratio < cutoff:
2025-07-01 05:51:24.991 # no non-identical "pretty close" pair
2025-07-01 05:51:24.997 if eqi is None:
2025-07-01 05:51:25.010 # no identical pair either -- treat it as a straight replace
2025-07-01 05:51:25.021 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:51:25.031 return
2025-07-01 05:51:25.038 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:51:25.044 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:51:25.051 else:
2025-07-01 05:51:25.057 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:51:25.065 eqi = None
2025-07-01 05:51:25.071
2025-07-01 05:51:25.078 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:51:25.084 # identical
2025-07-01 05:51:25.091
2025-07-01 05:51:25.102 # pump out diffs from before the synch point
2025-07-01 05:51:25.111 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:51:25.117
2025-07-01 05:51:25.124 # do intraline marking on the synch pair
2025-07-01 05:51:25.131 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:51:25.138 if eqi is None:
2025-07-01 05:51:25.143 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:51:25.149 atags = btags = ""
2025-07-01 05:51:25.155 cruncher.set_seqs(aelt, belt)
2025-07-01 05:51:25.161 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:51:25.166 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:51:25.172 if tag == 'replace':
2025-07-01 05:51:25.177 atags += '^' * la
2025-07-01 05:51:25.183 btags += '^' * lb
2025-07-01 05:51:25.189 elif tag == 'delete':
2025-07-01 05:51:25.195 atags += '-' * la
2025-07-01 05:51:25.201 elif tag == 'insert':
2025-07-01 05:51:25.210 btags += '+' * lb
2025-07-01 05:51:25.216 elif tag == 'equal':
2025-07-01 05:51:25.222 atags += ' ' * la
2025-07-01 05:51:25.228 btags += ' ' * lb
2025-07-01 05:51:25.234 else:
2025-07-01 05:51:25.242 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:51:25.254 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:51:25.263 else:
2025-07-01 05:51:25.270 # the synch pair is identical
2025-07-01 05:51:25.277 yield ' ' + aelt
2025-07-01 05:51:25.283
2025-07-01 05:51:25.289 # pump out diffs from after the synch point
2025-07-01 05:51:25.294 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:51:25.299
2025-07-01 05:51:25.303 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:51:25.308 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:51:25.314
2025-07-01 05:51:25.323 self = <difflib.Differ object at [hex]>
2025-07-01 05:51:25.331 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:51:25.343 alo = 504, ahi = 1101
2025-07-01 05:51:25.356 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:51:25.367 blo = 504, bhi = 1101
2025-07-01 05:51:25.378
2025-07-01 05:51:25.389 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:51:25.398 g = []
2025-07-01 05:51:25.409 if alo < ahi:
2025-07-01 05:51:25.419 if blo < bhi:
2025-07-01 05:51:25.427 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:51:25.438 else:
2025-07-01 05:51:25.447 g = self._dump('-', a, alo, ahi)
2025-07-01 05:51:25.455 elif blo < bhi:
2025-07-01 05:51:25.463 g = self._dump('+', b, blo, bhi)
2025-07-01 05:51:25.474
2025-07-01 05:51:25.483 > yield from g
2025-07-01 05:51:25.491
2025-07-01 05:51:25.497 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:51:25.509 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:51:25.518
2025-07-01 05:51:25.527 self = <difflib.Differ object at [hex]>
2025-07-01 05:51:25.537 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:51:25.547 alo = 504, ahi = 1101
2025-07-01 05:51:25.558 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:51:25.566 blo = 504, bhi = 1101
2025-07-01 05:51:25.572
2025-07-01 05:51:25.583 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:51:25.592 r"""
2025-07-01 05:51:25.600 When replacing one block of lines with another, search the blocks
2025-07-01 05:51:25.607 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:51:25.614 synch point, and intraline difference marking is done on the
2025-07-01 05:51:25.624 similar pair. Lots of work, but often worth it.
2025-07-01 05:51:25.634
2025-07-01 05:51:25.642 Example:
2025-07-01 05:51:25.651
2025-07-01 05:51:25.660 >>> d = Differ()
2025-07-01 05:51:25.667 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:51:25.673 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:51:25.678 >>> print(''.join(results), end="")
2025-07-01 05:51:25.682 - abcDefghiJkl
2025-07-01 05:51:25.691 + abcdefGhijkl
2025-07-01 05:51:25.700 """
2025-07-01 05:51:25.704
2025-07-01 05:51:25.709 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:51:25.713 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:51:25.718 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:51:25.722 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:51:25.728 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:51:25.735
2025-07-01 05:51:25.742 # search for the pair that matches best without being identical
2025-07-01 05:51:25.753 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:51:25.762 # on junk -- unless we have to)
2025-07-01 05:51:25.772 for j in range(blo, bhi):
2025-07-01 05:51:25.785 bj = b[j]
2025-07-01 05:51:25.797 cruncher.set_seq2(bj)
2025-07-01 05:51:25.807 for i in range(alo, ahi):
2025-07-01 05:51:25.816 ai = a[i]
2025-07-01 05:51:25.822 if ai == bj:
2025-07-01 05:51:25.829 if eqi is None:
2025-07-01 05:51:25.834 eqi, eqj = i, j
2025-07-01 05:51:25.840 continue
2025-07-01 05:51:25.848 cruncher.set_seq1(ai)
2025-07-01 05:51:25.859 # computing similarity is expensive, so use the quick
2025-07-01 05:51:25.868 # upper bounds first -- have seen this speed up messy
2025-07-01 05:51:25.876 # compares by a factor of 3.
2025-07-01 05:51:25.889 # note that ratio() is only expensive to compute the first
2025-07-01 05:51:25.899 # time it's called on a sequence pair; the expensive part
2025-07-01 05:51:25.910 # of the computation is cached by cruncher
2025-07-01 05:51:25.920 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:51:25.930 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:51:25.938 cruncher.ratio() > best_ratio:
2025-07-01 05:51:25.945 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:51:25.951 if best_ratio < cutoff:
2025-07-01 05:51:25.957 # no non-identical "pretty close" pair
2025-07-01 05:51:25.962 if eqi is None:
2025-07-01 05:51:25.968 # no identical pair either -- treat it as a straight replace
2025-07-01 05:51:25.974 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:51:25.980 return
2025-07-01 05:51:25.986 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:51:25.994 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:51:26.002 else:
2025-07-01 05:51:26.009 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:51:26.016 eqi = None
2025-07-01 05:51:26.022
2025-07-01 05:51:26.030 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:51:26.039 # identical
2025-07-01 05:51:26.045
2025-07-01 05:51:26.051 # pump out diffs from before the synch point
2025-07-01 05:51:26.058 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:51:26.069
2025-07-01 05:51:26.077 # do intraline marking on the synch pair
2025-07-01 05:51:26.087 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:51:26.092 if eqi is None:
2025-07-01 05:51:26.098 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:51:26.103 atags = btags = ""
2025-07-01 05:51:26.108 cruncher.set_seqs(aelt, belt)
2025-07-01 05:51:26.114 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:51:26.123 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:51:26.133 if tag == 'replace':
2025-07-01 05:51:26.141 atags += '^' * la
2025-07-01 05:51:26.150 btags += '^' * lb
2025-07-01 05:51:26.161 elif tag == 'delete':
2025-07-01 05:51:26.171 atags += '-' * la
2025-07-01 05:51:26.178 elif tag == 'insert':
2025-07-01 05:51:26.185 btags += '+' * lb
2025-07-01 05:51:26.191 elif tag == 'equal':
2025-07-01 05:51:26.199 atags += ' ' * la
2025-07-01 05:51:26.211 btags += ' ' * lb
2025-07-01 05:51:26.220 else:
2025-07-01 05:51:26.226 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:51:26.233 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:51:26.238 else:
2025-07-01 05:51:26.245 # the synch pair is identical
2025-07-01 05:51:26.252 yield ' ' + aelt
2025-07-01 05:51:26.259
2025-07-01 05:51:26.269 # pump out diffs from after the synch point
2025-07-01 05:51:26.281 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:51:26.291
2025-07-01 05:51:26.299 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:51:26.307 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:51:26.313
2025-07-01 05:51:26.320 self = <difflib.Differ object at [hex]>
2025-07-01 05:51:26.327 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:51:26.339 alo = 505, ahi = 1101
2025-07-01 05:51:26.350 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:51:26.360 blo = 505, bhi = 1101
2025-07-01 05:51:26.371
2025-07-01 05:51:26.380 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:51:26.387 g = []
2025-07-01 05:51:26.393 if alo < ahi:
2025-07-01 05:51:26.399 if blo < bhi:
2025-07-01 05:51:26.405 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:51:26.413 else:
2025-07-01 05:51:26.420 g = self._dump('-', a, alo, ahi)
2025-07-01 05:51:26.427 elif blo < bhi:
2025-07-01 05:51:26.434 g = self._dump('+', b, blo, bhi)
2025-07-01 05:51:26.447
2025-07-01 05:51:26.458 > yield from g
2025-07-01 05:51:26.467
2025-07-01 05:51:26.475 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:51:26.482 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:51:26.488
2025-07-01 05:51:26.499 self = <difflib.Differ object at [hex]>
2025-07-01 05:51:26.508 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:51:26.519 alo = 505, ahi = 1101
2025-07-01 05:51:26.532 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:51:26.544 blo = 505, bhi = 1101
2025-07-01 05:51:26.553
2025-07-01 05:51:26.561 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:51:26.567 r"""
2025-07-01 05:51:26.575 When replacing one block of lines with another, search the blocks
2025-07-01 05:51:26.587 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:51:26.599 synch point, and intraline difference marking is done on the
2025-07-01 05:51:26.610 similar pair. Lots of work, but often worth it.
2025-07-01 05:51:26.623
2025-07-01 05:51:26.633 Example:
2025-07-01 05:51:26.643
2025-07-01 05:51:26.654 >>> d = Differ()
2025-07-01 05:51:26.663 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:51:26.670 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:51:26.675 >>> print(''.join(results), end="")
2025-07-01 05:51:26.680 - abcDefghiJkl
2025-07-01 05:51:26.690 + abcdefGhijkl
2025-07-01 05:51:26.706 """
2025-07-01 05:51:26.714
2025-07-01 05:51:26.720 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:51:26.726 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:51:26.731 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:51:26.735 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:51:26.740 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:51:26.744
2025-07-01 05:51:26.751 # search for the pair that matches best without being identical
2025-07-01 05:51:26.761 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:51:26.769 # on junk -- unless we have to)
2025-07-01 05:51:26.777 for j in range(blo, bhi):
2025-07-01 05:51:26.784 bj = b[j]
2025-07-01 05:51:26.791 cruncher.set_seq2(bj)
2025-07-01 05:51:26.802 for i in range(alo, ahi):
2025-07-01 05:51:26.810 ai = a[i]
2025-07-01 05:51:26.820 if ai == bj:
2025-07-01 05:51:26.833 if eqi is None:
2025-07-01 05:51:26.844 eqi, eqj = i, j
2025-07-01 05:51:26.853 continue
2025-07-01 05:51:26.861 cruncher.set_seq1(ai)
2025-07-01 05:51:26.873 # computing similarity is expensive, so use the quick
2025-07-01 05:51:26.882 # upper bounds first -- have seen this speed up messy
2025-07-01 05:51:26.893 # compares by a factor of 3.
2025-07-01 05:51:26.905 # note that ratio() is only expensive to compute the first
2025-07-01 05:51:26.916 # time it's called on a sequence pair; the expensive part
2025-07-01 05:51:26.924 # of the computation is cached by cruncher
2025-07-01 05:51:26.933 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:51:26.940 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:51:26.947 cruncher.ratio() > best_ratio:
2025-07-01 05:51:26.953 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:51:26.964 if best_ratio < cutoff:
2025-07-01 05:51:26.973 # no non-identical "pretty close" pair
2025-07-01 05:51:26.981 if eqi is None:
2025-07-01 05:51:26.988 # no identical pair either -- treat it as a straight replace
2025-07-01 05:51:26.995 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:51:27.001 return
2025-07-01 05:51:27.007 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:51:27.020 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:51:27.029 else:
2025-07-01 05:51:27.035 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:51:27.041 eqi = None
2025-07-01 05:51:27.045
2025-07-01 05:51:27.050 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:51:27.054 # identical
2025-07-01 05:51:27.058
2025-07-01 05:51:27.063 # pump out diffs from before the synch point
2025-07-01 05:51:27.067 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:51:27.072
2025-07-01 05:51:27.076 # do intraline marking on the synch pair
2025-07-01 05:51:27.083 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:51:27.097 if eqi is None:
2025-07-01 05:51:27.106 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:51:27.117 atags = btags = ""
2025-07-01 05:51:27.127 cruncher.set_seqs(aelt, belt)
2025-07-01 05:51:27.135 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:51:27.143 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:51:27.151 if tag == 'replace':
2025-07-01 05:51:27.158 atags += '^' * la
2025-07-01 05:51:27.168 btags += '^' * lb
2025-07-01 05:51:27.179 elif tag == 'delete':
2025-07-01 05:51:27.186 atags += '-' * la
2025-07-01 05:51:27.192 elif tag == 'insert':
2025-07-01 05:51:27.198 btags += '+' * lb
2025-07-01 05:51:27.206 elif tag == 'equal':
2025-07-01 05:51:27.213 atags += ' ' * la
2025-07-01 05:51:27.220 btags += ' ' * lb
2025-07-01 05:51:27.227 else:
2025-07-01 05:51:27.235 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:51:27.240 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:51:27.245 else:
2025-07-01 05:51:27.249 # the synch pair is identical
2025-07-01 05:51:27.254 yield ' ' + aelt
2025-07-01 05:51:27.258
2025-07-01 05:51:27.262 # pump out diffs from after the synch point
2025-07-01 05:51:27.268 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:51:27.275
2025-07-01 05:51:27.286 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:51:27.296 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:51:27.304
2025-07-01 05:51:27.314 self = <difflib.Differ object at [hex]>
2025-07-01 05:51:27.327 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:51:27.337 alo = 506, ahi = 1101
2025-07-01 05:51:27.346 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:51:27.355 blo = 506, bhi = 1101
2025-07-01 05:51:27.368
2025-07-01 05:51:27.376 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:51:27.382 g = []
2025-07-01 05:51:27.391 if alo < ahi:
2025-07-01 05:51:27.402 if blo < bhi:
2025-07-01 05:51:27.410 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:51:27.419 else:
2025-07-01 05:51:27.426 g = self._dump('-', a, alo, ahi)
2025-07-01 05:51:27.437 elif blo < bhi:
2025-07-01 05:51:27.448 g = self._dump('+', b, blo, bhi)
2025-07-01 05:51:27.457
2025-07-01 05:51:27.464 > yield from g
2025-07-01 05:51:27.471
2025-07-01 05:51:27.476 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:51:27.482 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:51:27.487
2025-07-01 05:51:27.492 self = <difflib.Differ object at [hex]>
2025-07-01 05:51:27.500 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:51:27.507 alo = 506, ahi = 1101
2025-07-01 05:51:27.521 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:51:27.529 blo = 506, bhi = 1101
2025-07-01 05:51:27.536
2025-07-01 05:51:27.542 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:51:27.546 r"""
2025-07-01 05:51:27.551 When replacing one block of lines with another, search the blocks
2025-07-01 05:51:27.556 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:51:27.560 synch point, and intraline difference marking is done on the
2025-07-01 05:51:27.565 similar pair. Lots of work, but often worth it.
2025-07-01 05:51:27.571
2025-07-01 05:51:27.576 Example:
2025-07-01 05:51:27.582
2025-07-01 05:51:27.587 >>> d = Differ()
2025-07-01 05:51:27.593 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:51:27.600 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:51:27.607 >>> print(''.join(results), end="")
2025-07-01 05:51:27.614 - abcDefghiJkl
2025-07-01 05:51:27.627 + abcdefGhijkl
2025-07-01 05:51:27.640 """
2025-07-01 05:51:27.646
2025-07-01 05:51:27.657 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:51:27.666 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:51:27.673 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:51:27.678 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:51:27.684 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:51:27.690
2025-07-01 05:51:27.697 # search for the pair that matches best without being identical
2025-07-01 05:51:27.703 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:51:27.710 # on junk -- unless we have to)
2025-07-01 05:51:27.719 for j in range(blo, bhi):
2025-07-01 05:51:27.729 bj = b[j]
2025-07-01 05:51:27.736 cruncher.set_seq2(bj)
2025-07-01 05:51:27.743 for i in range(alo, ahi):
2025-07-01 05:51:27.748 ai = a[i]
2025-07-01 05:51:27.752 if ai == bj:
2025-07-01 05:51:27.758 if eqi is None:
2025-07-01 05:51:27.769 eqi, eqj = i, j
2025-07-01 05:51:27.778 continue
2025-07-01 05:51:27.785 cruncher.set_seq1(ai)
2025-07-01 05:51:27.791 # computing similarity is expensive, so use the quick
2025-07-01 05:51:27.796 # upper bounds first -- have seen this speed up messy
2025-07-01 05:51:27.802 # compares by a factor of 3.
2025-07-01 05:51:27.808 # note that ratio() is only expensive to compute the first
2025-07-01 05:51:27.813 # time it's called on a sequence pair; the expensive part
2025-07-01 05:51:27.819 # of the computation is cached by cruncher
2025-07-01 05:51:27.825 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:51:27.831 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:51:27.837 cruncher.ratio() > best_ratio:
2025-07-01 05:51:27.847 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:51:27.856 if best_ratio < cutoff:
2025-07-01 05:51:27.863 # no non-identical "pretty close" pair
2025-07-01 05:51:27.871 if eqi is None:
2025-07-01 05:51:27.883 # no identical pair either -- treat it as a straight replace
2025-07-01 05:51:27.892 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:51:27.898 return
2025-07-01 05:51:27.907 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:51:27.916 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:51:27.923 else:
2025-07-01 05:51:27.928 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:51:27.935 eqi = None
2025-07-01 05:51:27.945
2025-07-01 05:51:27.955 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:51:27.963 # identical
2025-07-01 05:51:27.972
2025-07-01 05:51:27.982 # pump out diffs from before the synch point
2025-07-01 05:51:27.989 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:51:27.996
2025-07-01 05:51:28.003 # do intraline marking on the synch pair
2025-07-01 05:51:28.011 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:51:28.017 if eqi is None:
2025-07-01 05:51:28.022 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:51:28.028 atags = btags = ""
2025-07-01 05:51:28.034 cruncher.set_seqs(aelt, belt)
2025-07-01 05:51:28.040 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:51:28.046 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:51:28.052 if tag == 'replace':
2025-07-01 05:51:28.058 atags += '^' * la
2025-07-01 05:51:28.063 btags += '^' * lb
2025-07-01 05:51:28.068 elif tag == 'delete':
2025-07-01 05:51:28.074 atags += '-' * la
2025-07-01 05:51:28.079 elif tag == 'insert':
2025-07-01 05:51:28.086 btags += '+' * lb
2025-07-01 05:51:28.097 elif tag == 'equal':
2025-07-01 05:51:28.105 atags += ' ' * la
2025-07-01 05:51:28.113 btags += ' ' * lb
2025-07-01 05:51:28.122 else:
2025-07-01 05:51:28.132 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:51:28.143 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:51:28.154 else:
2025-07-01 05:51:28.163 # the synch pair is identical
2025-07-01 05:51:28.171 yield ' ' + aelt
2025-07-01 05:51:28.178
2025-07-01 05:51:28.185 # pump out diffs from after the synch point
2025-07-01 05:51:28.191 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:51:28.196
2025-07-01 05:51:28.200 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:51:28.206 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:51:28.210
2025-07-01 05:51:28.214 self = <difflib.Differ object at [hex]>
2025-07-01 05:51:28.219 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:51:28.231 alo = 507, ahi = 1101
2025-07-01 05:51:28.242 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:51:28.253 blo = 507, bhi = 1101
2025-07-01 05:51:28.263
2025-07-01 05:51:28.271 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:51:28.278 g = []
2025-07-01 05:51:28.284 if alo < ahi:
2025-07-01 05:51:28.291 if blo < bhi:
2025-07-01 05:51:28.298 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:51:28.309 else:
2025-07-01 05:51:28.318 g = self._dump('-', a, alo, ahi)
2025-07-01 05:51:28.325 elif blo < bhi:
2025-07-01 05:51:28.339 g = self._dump('+', b, blo, bhi)
2025-07-01 05:51:28.348
2025-07-01 05:51:28.354 > yield from g
2025-07-01 05:51:28.360
2025-07-01 05:51:28.368 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:51:28.375 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:51:28.381
2025-07-01 05:51:28.387 self = <difflib.Differ object at [hex]>
2025-07-01 05:51:28.395 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:51:28.401 alo = 507, ahi = 1101
2025-07-01 05:51:28.409 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:51:28.414 blo = 507, bhi = 1101
2025-07-01 05:51:28.425
2025-07-01 05:51:28.435 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:51:28.442 r"""
2025-07-01 05:51:28.452 When replacing one block of lines with another, search the blocks
2025-07-01 05:51:28.463 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:51:28.473 synch point, and intraline difference marking is done on the
2025-07-01 05:51:28.481 similar pair. Lots of work, but often worth it.
2025-07-01 05:51:28.492
2025-07-01 05:51:28.502 Example:
2025-07-01 05:51:28.510
2025-07-01 05:51:28.517 >>> d = Differ()
2025-07-01 05:51:28.524 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:51:28.531 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:51:28.536 >>> print(''.join(results), end="")
2025-07-01 05:51:28.542 - abcDefghiJkl
2025-07-01 05:51:28.567 + abcdefGhijkl
2025-07-01 05:51:28.583 """
2025-07-01 05:51:28.588
2025-07-01 05:51:28.594 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:51:28.599 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:51:28.604 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:51:28.616 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:51:28.627 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:51:28.637
2025-07-01 05:51:28.647 # search for the pair that matches best without being identical
2025-07-01 05:51:28.656 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:51:28.664 # on junk -- unless we have to)
2025-07-01 05:51:28.675 for j in range(blo, bhi):
2025-07-01 05:51:28.683 bj = b[j]
2025-07-01 05:51:28.693 cruncher.set_seq2(bj)
2025-07-01 05:51:28.703 for i in range(alo, ahi):
2025-07-01 05:51:28.712 ai = a[i]
2025-07-01 05:51:28.719 if ai == bj:
2025-07-01 05:51:28.728 if eqi is None:
2025-07-01 05:51:28.740 eqi, eqj = i, j
2025-07-01 05:51:28.752 continue
2025-07-01 05:51:28.765 cruncher.set_seq1(ai)
2025-07-01 05:51:28.773 # computing similarity is expensive, so use the quick
2025-07-01 05:51:28.784 # upper bounds first -- have seen this speed up messy
2025-07-01 05:51:28.792 # compares by a factor of 3.
2025-07-01 05:51:28.801 # note that ratio() is only expensive to compute the first
2025-07-01 05:51:28.809 # time it's called on a sequence pair; the expensive part
2025-07-01 05:51:28.816 # of the computation is cached by cruncher
2025-07-01 05:51:28.821 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:51:28.827 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:51:28.833 cruncher.ratio() > best_ratio:
2025-07-01 05:51:28.839 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:51:28.845 if best_ratio < cutoff:
2025-07-01 05:51:28.851 # no non-identical "pretty close" pair
2025-07-01 05:51:28.859 if eqi is None:
2025-07-01 05:51:28.867 # no identical pair either -- treat it as a straight replace
2025-07-01 05:51:28.878 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:51:28.886 return
2025-07-01 05:51:28.891 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:51:28.896 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:51:28.901 else:
2025-07-01 05:51:28.907 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:51:28.914 eqi = None
2025-07-01 05:51:28.920
2025-07-01 05:51:28.928 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:51:28.934 # identical
2025-07-01 05:51:28.940
2025-07-01 05:51:28.948 # pump out diffs from before the synch point
2025-07-01 05:51:28.955 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:51:28.962
2025-07-01 05:51:28.968 # do intraline marking on the synch pair
2025-07-01 05:51:28.978 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:51:28.990 if eqi is None:
2025-07-01 05:51:28.999 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:51:29.008 atags = btags = ""
2025-07-01 05:51:29.018 cruncher.set_seqs(aelt, belt)
2025-07-01 05:51:29.029 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:51:29.038 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:51:29.043 if tag == 'replace':
2025-07-01 05:51:29.049 atags += '^' * la
2025-07-01 05:51:29.055 btags += '^' * lb
2025-07-01 05:51:29.062 elif tag == 'delete':
2025-07-01 05:51:29.072 atags += '-' * la
2025-07-01 05:51:29.083 elif tag == 'insert':
2025-07-01 05:51:29.092 btags += '+' * lb
2025-07-01 05:51:29.100 elif tag == 'equal':
2025-07-01 05:51:29.106 atags += ' ' * la
2025-07-01 05:51:29.112 btags += ' ' * lb
2025-07-01 05:51:29.117 else:
2025-07-01 05:51:29.123 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:51:29.130 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:51:29.136 else:
2025-07-01 05:51:29.142 # the synch pair is identical
2025-07-01 05:51:29.152 yield ' ' + aelt
2025-07-01 05:51:29.162
2025-07-01 05:51:29.170 # pump out diffs from after the synch point
2025-07-01 05:51:29.176 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:51:29.181
2025-07-01 05:51:29.186 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:51:29.191 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:51:29.195
2025-07-01 05:51:29.200 self = <difflib.Differ object at [hex]>
2025-07-01 05:51:29.205 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:51:29.211 alo = 510, ahi = 1101
2025-07-01 05:51:29.217 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:51:29.222 blo = 510, bhi = 1101
2025-07-01 05:51:29.228
2025-07-01 05:51:29.234 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:51:29.240 g = []
2025-07-01 05:51:29.246 if alo < ahi:
2025-07-01 05:51:29.252 if blo < bhi:
2025-07-01 05:51:29.259 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:51:29.266 else:
2025-07-01 05:51:29.273 g = self._dump('-', a, alo, ahi)
2025-07-01 05:51:29.279 elif blo < bhi:
2025-07-01 05:51:29.287 g = self._dump('+', b, blo, bhi)
2025-07-01 05:51:29.294
2025-07-01 05:51:29.303 > yield from g
2025-07-01 05:51:29.309
2025-07-01 05:51:29.315 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:51:29.322 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:51:29.334
2025-07-01 05:51:29.344 self = <difflib.Differ object at [hex]>
2025-07-01 05:51:29.353 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:51:29.360 alo = 510, ahi = 1101
2025-07-01 05:51:29.370 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:51:29.378 blo = 510, bhi = 1101
2025-07-01 05:51:29.384
2025-07-01 05:51:29.391 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:51:29.398 r"""
2025-07-01 05:51:29.405 When replacing one block of lines with another, search the blocks
2025-07-01 05:51:29.411 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:51:29.416 synch point, and intraline difference marking is done on the
2025-07-01 05:51:29.421 similar pair. Lots of work, but often worth it.
2025-07-01 05:51:29.429
2025-07-01 05:51:29.437 Example:
2025-07-01 05:51:29.443
2025-07-01 05:51:29.448 >>> d = Differ()
2025-07-01 05:51:29.453 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:51:29.461 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:51:29.468 >>> print(''.join(results), end="")
2025-07-01 05:51:29.477 - abcDefghiJkl
2025-07-01 05:51:29.499 + abcdefGhijkl
2025-07-01 05:51:29.512 """
2025-07-01 05:51:29.519
2025-07-01 05:51:29.527 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:51:29.534 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:51:29.541 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:51:29.548 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:51:29.555 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:51:29.562
2025-07-01 05:51:29.569 # search for the pair that matches best without being identical
2025-07-01 05:51:29.581 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:51:29.590 # on junk -- unless we have to)
2025-07-01 05:51:29.597 for j in range(blo, bhi):
2025-07-01 05:51:29.603 bj = b[j]
2025-07-01 05:51:29.610 cruncher.set_seq2(bj)
2025-07-01 05:51:29.617 for i in range(alo, ahi):
2025-07-01 05:51:29.624 ai = a[i]
2025-07-01 05:51:29.631 if ai == bj:
2025-07-01 05:51:29.637 if eqi is None:
2025-07-01 05:51:29.641 eqi, eqj = i, j
2025-07-01 05:51:29.646 continue
2025-07-01 05:51:29.654 cruncher.set_seq1(ai)
2025-07-01 05:51:29.666 # computing similarity is expensive, so use the quick
2025-07-01 05:51:29.675 # upper bounds first -- have seen this speed up messy
2025-07-01 05:51:29.683 # compares by a factor of 3.
2025-07-01 05:51:29.695 # note that ratio() is only expensive to compute the first
2025-07-01 05:51:29.710 # time it's called on a sequence pair; the expensive part
2025-07-01 05:51:29.719 # of the computation is cached by cruncher
2025-07-01 05:51:29.727 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:51:29.736 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:51:29.743 cruncher.ratio() > best_ratio:
2025-07-01 05:51:29.752 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:51:29.764 if best_ratio < cutoff:
2025-07-01 05:51:29.776 # no non-identical "pretty close" pair
2025-07-01 05:51:29.789 if eqi is None:
2025-07-01 05:51:29.799 # no identical pair either -- treat it as a straight replace
2025-07-01 05:51:29.807 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:51:29.821 return
2025-07-01 05:51:29.832 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:51:29.845 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:51:29.858 else:
2025-07-01 05:51:29.872 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:51:29.883 eqi = None
2025-07-01 05:51:29.893
2025-07-01 05:51:29.905 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:51:29.916 # identical
2025-07-01 05:51:29.927
2025-07-01 05:51:29.941 # pump out diffs from before the synch point
2025-07-01 05:51:29.952 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:51:29.965
2025-07-01 05:51:29.975 # do intraline marking on the synch pair
2025-07-01 05:51:29.984 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:51:29.992 if eqi is None:
2025-07-01 05:51:30.007 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:51:30.018 atags = btags = ""
2025-07-01 05:51:30.026 cruncher.set_seqs(aelt, belt)
2025-07-01 05:51:30.037 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:51:30.045 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:51:30.055 if tag == 'replace':
2025-07-01 05:51:30.066 atags += '^' * la
2025-07-01 05:51:30.079 btags += '^' * lb
2025-07-01 05:51:30.090 elif tag == 'delete':
2025-07-01 05:51:30.099 atags += '-' * la
2025-07-01 05:51:30.107 elif tag == 'insert':
2025-07-01 05:51:30.119 btags += '+' * lb
2025-07-01 05:51:30.129 elif tag == 'equal':
2025-07-01 05:51:30.136 atags += ' ' * la
2025-07-01 05:51:30.147 btags += ' ' * lb
2025-07-01 05:51:30.157 else:
2025-07-01 05:51:30.166 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:51:30.175 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:51:30.182 else:
2025-07-01 05:51:30.191 # the synch pair is identical
2025-07-01 05:51:30.199 yield ' ' + aelt
2025-07-01 05:51:30.205
2025-07-01 05:51:30.218 # pump out diffs from after the synch point
2025-07-01 05:51:30.229 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:51:30.239
2025-07-01 05:51:30.249 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:51:30.257 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:51:30.263
2025-07-01 05:51:30.269 self = <difflib.Differ object at [hex]>
2025-07-01 05:51:30.276 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:51:30.283 alo = 511, ahi = 1101
2025-07-01 05:51:30.293 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:51:30.301 blo = 511, bhi = 1101
2025-07-01 05:51:30.307
2025-07-01 05:51:30.315 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:51:30.325 g = []
2025-07-01 05:51:30.336 if alo < ahi:
2025-07-01 05:51:30.345 if blo < bhi:
2025-07-01 05:51:30.356 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:51:30.366 else:
2025-07-01 05:51:30.375 g = self._dump('-', a, alo, ahi)
2025-07-01 05:51:30.383 elif blo < bhi:
2025-07-01 05:51:30.394 g = self._dump('+', b, blo, bhi)
2025-07-01 05:51:30.403
2025-07-01 05:51:30.411 > yield from g
2025-07-01 05:51:30.423
2025-07-01 05:51:30.435 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:51:30.445 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:51:30.453
2025-07-01 05:51:30.459 self = <difflib.Differ object at [hex]>
2025-07-01 05:51:30.467 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:51:30.475 alo = 511, ahi = 1101
2025-07-01 05:51:30.486 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:51:30.494 blo = 511, bhi = 1101
2025-07-01 05:51:30.500
2025-07-01 05:51:30.506 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:51:30.515 r"""
2025-07-01 05:51:30.527 When replacing one block of lines with another, search the blocks
2025-07-01 05:51:30.540 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:51:30.552 synch point, and intraline difference marking is done on the
2025-07-01 05:51:30.564 similar pair. Lots of work, but often worth it.
2025-07-01 05:51:30.572
2025-07-01 05:51:30.579 Example:
2025-07-01 05:51:30.585
2025-07-01 05:51:30.597 >>> d = Differ()
2025-07-01 05:51:30.607 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:51:30.618 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:51:30.627 >>> print(''.join(results), end="")
2025-07-01 05:51:30.639 - abcDefghiJkl
2025-07-01 05:51:30.657 + abcdefGhijkl
2025-07-01 05:51:30.670 """
2025-07-01 05:51:30.677
2025-07-01 05:51:30.685 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:51:30.692 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:51:30.700 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:51:30.708 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:51:30.716 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:51:30.724
2025-07-01 05:51:30.732 # search for the pair that matches best without being identical
2025-07-01 05:51:30.741 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:51:30.749 # on junk -- unless we have to)
2025-07-01 05:51:30.757 for j in range(blo, bhi):
2025-07-01 05:51:30.763 bj = b[j]
2025-07-01 05:51:30.768 cruncher.set_seq2(bj)
2025-07-01 05:51:30.774 for i in range(alo, ahi):
2025-07-01 05:51:30.784 ai = a[i]
2025-07-01 05:51:30.793 if ai == bj:
2025-07-01 05:51:30.799 if eqi is None:
2025-07-01 05:51:30.805 eqi, eqj = i, j
2025-07-01 05:51:30.811 continue
2025-07-01 05:51:30.818 cruncher.set_seq1(ai)
2025-07-01 05:51:30.829 # computing similarity is expensive, so use the quick
2025-07-01 05:51:30.841 # upper bounds first -- have seen this speed up messy
2025-07-01 05:51:30.853 # compares by a factor of 3.
2025-07-01 05:51:30.860 # note that ratio() is only expensive to compute the first
2025-07-01 05:51:30.866 # time it's called on a sequence pair; the expensive part
2025-07-01 05:51:30.872 # of the computation is cached by cruncher
2025-07-01 05:51:30.878 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:51:30.888 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:51:30.900 cruncher.ratio() > best_ratio:
2025-07-01 05:51:30.908 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:51:30.921 if best_ratio < cutoff:
2025-07-01 05:51:30.931 # no non-identical "pretty close" pair
2025-07-01 05:51:30.942 if eqi is None:
2025-07-01 05:51:30.950 # no identical pair either -- treat it as a straight replace
2025-07-01 05:51:30.957 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:51:30.964 return
2025-07-01 05:51:30.970 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:51:30.981 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:51:30.988 else:
2025-07-01 05:51:30.994 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:51:31.001 eqi = None
2025-07-01 05:51:31.009
2025-07-01 05:51:31.016 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:51:31.025 # identical
2025-07-01 05:51:31.040
2025-07-01 05:51:31.048 # pump out diffs from before the synch point
2025-07-01 05:51:31.055 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:51:31.062
2025-07-01 05:51:31.068 # do intraline marking on the synch pair
2025-07-01 05:51:31.073 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:51:31.078 if eqi is None:
2025-07-01 05:51:31.083 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:51:31.088 atags = btags = ""
2025-07-01 05:51:31.093 cruncher.set_seqs(aelt, belt)
2025-07-01 05:51:31.097 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:51:31.105 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:51:31.112 if tag == 'replace':
2025-07-01 05:51:31.118 atags += '^' * la
2025-07-01 05:51:31.125 btags += '^' * lb
2025-07-01 05:51:31.134 elif tag == 'delete':
2025-07-01 05:51:31.143 atags += '-' * la
2025-07-01 05:51:31.152 elif tag == 'insert':
2025-07-01 05:51:31.160 btags += '+' * lb
2025-07-01 05:51:31.166 elif tag == 'equal':
2025-07-01 05:51:31.172 atags += ' ' * la
2025-07-01 05:51:31.178 btags += ' ' * lb
2025-07-01 05:51:31.183 else:
2025-07-01 05:51:31.188 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:51:31.193 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:51:31.199 else:
2025-07-01 05:51:31.204 # the synch pair is identical
2025-07-01 05:51:31.210 yield ' ' + aelt
2025-07-01 05:51:31.220
2025-07-01 05:51:31.229 # pump out diffs from after the synch point
2025-07-01 05:51:31.236 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:51:31.243
2025-07-01 05:51:31.250 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:51:31.256 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:51:31.262
2025-07-01 05:51:31.268 self = <difflib.Differ object at [hex]>
2025-07-01 05:51:31.275 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:51:31.281 alo = 512, ahi = 1101
2025-07-01 05:51:31.291 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:51:31.299 blo = 512, bhi = 1101
2025-07-01 05:51:31.310
2025-07-01 05:51:31.318 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:51:31.324 g = []
2025-07-01 05:51:31.329 if alo < ahi:
2025-07-01 05:51:31.335 if blo < bhi:
2025-07-01 05:51:31.341 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:51:31.347 else:
2025-07-01 05:51:31.354 g = self._dump('-', a, alo, ahi)
2025-07-01 05:51:31.367 elif blo < bhi:
2025-07-01 05:51:31.377 g = self._dump('+', b, blo, bhi)
2025-07-01 05:51:31.385
2025-07-01 05:51:31.392 > yield from g
2025-07-01 05:51:31.399
2025-07-01 05:51:31.406 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:51:31.413 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:51:31.419
2025-07-01 05:51:31.425 self = <difflib.Differ object at [hex]>
2025-07-01 05:51:31.432 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:51:31.437 alo = 512, ahi = 1101
2025-07-01 05:51:31.445 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:51:31.451 blo = 512, bhi = 1101
2025-07-01 05:51:31.458
2025-07-01 05:51:31.469 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:51:31.478 r"""
2025-07-01 05:51:31.485 When replacing one block of lines with another, search the blocks
2025-07-01 05:51:31.491 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:51:31.497 synch point, and intraline difference marking is done on the
2025-07-01 05:51:31.502 similar pair. Lots of work, but often worth it.
2025-07-01 05:51:31.506
2025-07-01 05:51:31.511 Example:
2025-07-01 05:51:31.516
2025-07-01 05:51:31.521 >>> d = Differ()
2025-07-01 05:51:31.526 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:51:31.530 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:51:31.536 >>> print(''.join(results), end="")
2025-07-01 05:51:31.542 - abcDefghiJkl
2025-07-01 05:51:31.553 + abcdefGhijkl
2025-07-01 05:51:31.566 """
2025-07-01 05:51:31.575
2025-07-01 05:51:31.587 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:51:31.596 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:51:31.605 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:51:31.615 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:51:31.626 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:51:31.635
2025-07-01 05:51:31.648 # search for the pair that matches best without being identical
2025-07-01 05:51:31.661 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:51:31.670 # on junk -- unless we have to)
2025-07-01 05:51:31.677 for j in range(blo, bhi):
2025-07-01 05:51:31.683 bj = b[j]
2025-07-01 05:51:31.689 cruncher.set_seq2(bj)
2025-07-01 05:51:31.696 for i in range(alo, ahi):
2025-07-01 05:51:31.702 ai = a[i]
2025-07-01 05:51:31.709 if ai == bj:
2025-07-01 05:51:31.715 if eqi is None:
2025-07-01 05:51:31.722 eqi, eqj = i, j
2025-07-01 05:51:31.732 continue
2025-07-01 05:51:31.742 cruncher.set_seq1(ai)
2025-07-01 05:51:31.749 # computing similarity is expensive, so use the quick
2025-07-01 05:51:31.755 # upper bounds first -- have seen this speed up messy
2025-07-01 05:51:31.760 # compares by a factor of 3.
2025-07-01 05:51:31.767 # note that ratio() is only expensive to compute the first
2025-07-01 05:51:31.774 # time it's called on a sequence pair; the expensive part
2025-07-01 05:51:31.781 # of the computation is cached by cruncher
2025-07-01 05:51:31.792 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:51:31.799 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:51:31.805 cruncher.ratio() > best_ratio:
2025-07-01 05:51:31.811 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:51:31.816 if best_ratio < cutoff:
2025-07-01 05:51:31.824 # no non-identical "pretty close" pair
2025-07-01 05:51:31.834 if eqi is None:
2025-07-01 05:51:31.840 # no identical pair either -- treat it as a straight replace
2025-07-01 05:51:31.847 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:51:31.858 return
2025-07-01 05:51:31.869 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:51:31.880 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:51:31.889 else:
2025-07-01 05:51:31.896 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:51:31.903 eqi = None
2025-07-01 05:51:31.908
2025-07-01 05:51:31.915 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:51:31.920 # identical
2025-07-01 05:51:31.927
2025-07-01 05:51:31.933 # pump out diffs from before the synch point
2025-07-01 05:51:31.938 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:51:31.949
2025-07-01 05:51:31.958 # do intraline marking on the synch pair
2025-07-01 05:51:31.967 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:51:31.977 if eqi is None:
2025-07-01 05:51:31.988 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:51:32.000 atags = btags = ""
2025-07-01 05:51:32.013 cruncher.set_seqs(aelt, belt)
2025-07-01 05:51:32.025 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:51:32.036 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:51:32.044 if tag == 'replace':
2025-07-01 05:51:32.056 atags += '^' * la
2025-07-01 05:51:32.062 btags += '^' * lb
2025-07-01 05:51:32.068 elif tag == 'delete':
2025-07-01 05:51:32.075 atags += '-' * la
2025-07-01 05:51:32.083 elif tag == 'insert':
2025-07-01 05:51:32.095 btags += '+' * lb
2025-07-01 05:51:32.104 elif tag == 'equal':
2025-07-01 05:51:32.115 atags += ' ' * la
2025-07-01 05:51:32.124 btags += ' ' * lb
2025-07-01 05:51:32.132 else:
2025-07-01 05:51:32.140 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:51:32.147 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:51:32.153 else:
2025-07-01 05:51:32.159 # the synch pair is identical
2025-07-01 05:51:32.165 yield ' ' + aelt
2025-07-01 05:51:32.170
2025-07-01 05:51:32.179 # pump out diffs from after the synch point
2025-07-01 05:51:32.189 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:51:32.197
2025-07-01 05:51:32.204 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:51:32.210 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:51:32.218
2025-07-01 05:51:32.224 self = <difflib.Differ object at [hex]>
2025-07-01 05:51:32.232 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:51:32.239 alo = 513, ahi = 1101
2025-07-01 05:51:32.246 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:51:32.257 blo = 513, bhi = 1101
2025-07-01 05:51:32.265
2025-07-01 05:51:32.271 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:51:32.277 g = []
2025-07-01 05:51:32.283 if alo < ahi:
2025-07-01 05:51:32.294 if blo < bhi:
2025-07-01 05:51:32.302 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:51:32.312 else:
2025-07-01 05:51:32.322 g = self._dump('-', a, alo, ahi)
2025-07-01 05:51:32.330 elif blo < bhi:
2025-07-01 05:51:32.340 g = self._dump('+', b, blo, bhi)
2025-07-01 05:51:32.348
2025-07-01 05:51:32.355 > yield from g
2025-07-01 05:51:32.362
2025-07-01 05:51:32.372 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:51:32.385 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:51:32.398
2025-07-01 05:51:32.409 self = <difflib.Differ object at [hex]>
2025-07-01 05:51:32.415 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:51:32.422 alo = 513, ahi = 1101
2025-07-01 05:51:32.428 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:51:32.433 blo = 513, bhi = 1101
2025-07-01 05:51:32.438
2025-07-01 05:51:32.443 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:51:32.450 r"""
2025-07-01 05:51:32.460 When replacing one block of lines with another, search the blocks
2025-07-01 05:51:32.467 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:51:32.475 synch point, and intraline difference marking is done on the
2025-07-01 05:51:32.482 similar pair. Lots of work, but often worth it.
2025-07-01 05:51:32.491
2025-07-01 05:51:32.504 Example:
2025-07-01 05:51:32.515
2025-07-01 05:51:32.528 >>> d = Differ()
2025-07-01 05:51:32.542 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:51:32.550 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:51:32.559 >>> print(''.join(results), end="")
2025-07-01 05:51:32.568 - abcDefghiJkl
2025-07-01 05:51:32.591 + abcdefGhijkl
2025-07-01 05:51:32.606 """
2025-07-01 05:51:32.613
2025-07-01 05:51:32.618 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:51:32.622 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:51:32.627 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:51:32.631 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:51:32.636 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:51:32.644
2025-07-01 05:51:32.650 # search for the pair that matches best without being identical
2025-07-01 05:51:32.656 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:51:32.661 # on junk -- unless we have to)
2025-07-01 05:51:32.667 for j in range(blo, bhi):
2025-07-01 05:51:32.673 bj = b[j]
2025-07-01 05:51:32.685 cruncher.set_seq2(bj)
2025-07-01 05:51:32.695 for i in range(alo, ahi):
2025-07-01 05:51:32.704 ai = a[i]
2025-07-01 05:51:32.712 if ai == bj:
2025-07-01 05:51:32.721 if eqi is None:
2025-07-01 05:51:32.730 eqi, eqj = i, j
2025-07-01 05:51:32.737 continue
2025-07-01 05:51:32.743 cruncher.set_seq1(ai)
2025-07-01 05:51:32.751 # computing similarity is expensive, so use the quick
2025-07-01 05:51:32.760 # upper bounds first -- have seen this speed up messy
2025-07-01 05:51:32.767 # compares by a factor of 3.
2025-07-01 05:51:32.774 # note that ratio() is only expensive to compute the first
2025-07-01 05:51:32.782 # time it's called on a sequence pair; the expensive part
2025-07-01 05:51:32.787 # of the computation is cached by cruncher
2025-07-01 05:51:32.800 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:51:32.812 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:51:32.820 cruncher.ratio() > best_ratio:
2025-07-01 05:51:32.828 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:51:32.835 if best_ratio < cutoff:
2025-07-01 05:51:32.843 # no non-identical "pretty close" pair
2025-07-01 05:51:32.851 if eqi is None:
2025-07-01 05:51:32.859 # no identical pair either -- treat it as a straight replace
2025-07-01 05:51:32.868 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:51:32.876 return
2025-07-01 05:51:32.887 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:51:32.895 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:51:32.901 else:
2025-07-01 05:51:32.906 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:51:32.912 eqi = None
2025-07-01 05:51:32.918
2025-07-01 05:51:32.926 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:51:32.932 # identical
2025-07-01 05:51:32.938
2025-07-01 05:51:32.944 # pump out diffs from before the synch point
2025-07-01 05:51:32.951 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:51:32.958
2025-07-01 05:51:32.968 # do intraline marking on the synch pair
2025-07-01 05:51:32.979 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:51:32.990 if eqi is None:
2025-07-01 05:51:32.999 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:51:33.006 atags = btags = ""
2025-07-01 05:51:33.013 cruncher.set_seqs(aelt, belt)
2025-07-01 05:51:33.025 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:51:33.036 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:51:33.043 if tag == 'replace':
2025-07-01 05:51:33.049 atags += '^' * la
2025-07-01 05:51:33.055 btags += '^' * lb
2025-07-01 05:51:33.060 elif tag == 'delete':
2025-07-01 05:51:33.066 atags += '-' * la
2025-07-01 05:51:33.071 elif tag == 'insert':
2025-07-01 05:51:33.080 btags += '+' * lb
2025-07-01 05:51:33.088 elif tag == 'equal':
2025-07-01 05:51:33.095 atags += ' ' * la
2025-07-01 05:51:33.102 btags += ' ' * lb
2025-07-01 05:51:33.109 else:
2025-07-01 05:51:33.116 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:51:33.124 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:51:33.131 else:
2025-07-01 05:51:33.138 # the synch pair is identical
2025-07-01 05:51:33.148 yield ' ' + aelt
2025-07-01 05:51:33.160
2025-07-01 05:51:33.169 # pump out diffs from after the synch point
2025-07-01 05:51:33.177 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:51:33.182
2025-07-01 05:51:33.187 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:51:33.199 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:51:33.207
2025-07-01 05:51:33.216 self = <difflib.Differ object at [hex]>
2025-07-01 05:51:33.225 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:51:33.231 alo = 514, ahi = 1101
2025-07-01 05:51:33.243 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:51:33.254 blo = 514, bhi = 1101
2025-07-01 05:51:33.265
2025-07-01 05:51:33.277 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:51:33.287 g = []
2025-07-01 05:51:33.294 if alo < ahi:
2025-07-01 05:51:33.300 if blo < bhi:
2025-07-01 05:51:33.309 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:51:33.320 else:
2025-07-01 05:51:33.328 g = self._dump('-', a, alo, ahi)
2025-07-01 05:51:33.334 elif blo < bhi:
2025-07-01 05:51:33.340 g = self._dump('+', b, blo, bhi)
2025-07-01 05:51:33.347
2025-07-01 05:51:33.353 > yield from g
2025-07-01 05:51:33.360
2025-07-01 05:51:33.365 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:51:33.372 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:51:33.378
2025-07-01 05:51:33.384 self = <difflib.Differ object at [hex]>
2025-07-01 05:51:33.397 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:51:33.407 alo = 514, ahi = 1101
2025-07-01 05:51:33.418 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:51:33.431 blo = 514, bhi = 1101
2025-07-01 05:51:33.441
2025-07-01 05:51:33.453 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:51:33.464 r"""
2025-07-01 05:51:33.477 When replacing one block of lines with another, search the blocks
2025-07-01 05:51:33.488 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:51:33.497 synch point, and intraline difference marking is done on the
2025-07-01 05:51:33.505 similar pair. Lots of work, but often worth it.
2025-07-01 05:51:33.511
2025-07-01 05:51:33.517 Example:
2025-07-01 05:51:33.523
2025-07-01 05:51:33.531 >>> d = Differ()
2025-07-01 05:51:33.537 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:51:33.546 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:51:33.552 >>> print(''.join(results), end="")
2025-07-01 05:51:33.559 - abcDefghiJkl
2025-07-01 05:51:33.572 + abcdefGhijkl
2025-07-01 05:51:33.585 """
2025-07-01 05:51:33.596
2025-07-01 05:51:33.605 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:51:33.611 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:51:33.618 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:51:33.624 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:51:33.630 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:51:33.636
2025-07-01 05:51:33.650 # search for the pair that matches best without being identical
2025-07-01 05:51:33.660 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:51:33.667 # on junk -- unless we have to)
2025-07-01 05:51:33.674 for j in range(blo, bhi):
2025-07-01 05:51:33.682 bj = b[j]
2025-07-01 05:51:33.693 cruncher.set_seq2(bj)
2025-07-01 05:51:33.704 for i in range(alo, ahi):
2025-07-01 05:51:33.713 ai = a[i]
2025-07-01 05:51:33.720 if ai == bj:
2025-07-01 05:51:33.726 if eqi is None:
2025-07-01 05:51:33.732 eqi, eqj = i, j
2025-07-01 05:51:33.738 continue
2025-07-01 05:51:33.744 cruncher.set_seq1(ai)
2025-07-01 05:51:33.751 # computing similarity is expensive, so use the quick
2025-07-01 05:51:33.759 # upper bounds first -- have seen this speed up messy
2025-07-01 05:51:33.773 # compares by a factor of 3.
2025-07-01 05:51:33.781 # note that ratio() is only expensive to compute the first
2025-07-01 05:51:33.788 # time it's called on a sequence pair; the expensive part
2025-07-01 05:51:33.801 # of the computation is cached by cruncher
2025-07-01 05:51:33.809 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:51:33.818 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:51:33.825 cruncher.ratio() > best_ratio:
2025-07-01 05:51:33.836 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:51:33.845 if best_ratio < cutoff:
2025-07-01 05:51:33.858 # no non-identical "pretty close" pair
2025-07-01 05:51:33.867 if eqi is None:
2025-07-01 05:51:33.875 # no identical pair either -- treat it as a straight replace
2025-07-01 05:51:33.882 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:51:33.888 return
2025-07-01 05:51:33.895 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:51:33.901 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:51:33.906 else:
2025-07-01 05:51:33.911 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:51:33.915 eqi = None
2025-07-01 05:51:33.920
2025-07-01 05:51:33.925 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:51:33.930 # identical
2025-07-01 05:51:33.935
2025-07-01 05:51:33.940 # pump out diffs from before the synch point
2025-07-01 05:51:33.945 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:51:33.950
2025-07-01 05:51:33.956 # do intraline marking on the synch pair
2025-07-01 05:51:33.961 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:51:33.966 if eqi is None:
2025-07-01 05:51:33.970 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:51:33.975 atags = btags = ""
2025-07-01 05:51:33.979 cruncher.set_seqs(aelt, belt)
2025-07-01 05:51:33.984 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:51:33.989 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:51:33.994 if tag == 'replace':
2025-07-01 05:51:33.999 atags += '^' * la
2025-07-01 05:51:34.010 btags += '^' * lb
2025-07-01 05:51:34.019 elif tag == 'delete':
2025-07-01 05:51:34.025 atags += '-' * la
2025-07-01 05:51:34.031 elif tag == 'insert':
2025-07-01 05:51:34.039 btags += '+' * lb
2025-07-01 05:51:34.053 elif tag == 'equal':
2025-07-01 05:51:34.065 atags += ' ' * la
2025-07-01 05:51:34.074 btags += ' ' * lb
2025-07-01 05:51:34.080 else:
2025-07-01 05:51:34.085 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:51:34.091 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:51:34.102 else:
2025-07-01 05:51:34.109 # the synch pair is identical
2025-07-01 05:51:34.115 yield ' ' + aelt
2025-07-01 05:51:34.120
2025-07-01 05:51:34.125 # pump out diffs from after the synch point
2025-07-01 05:51:34.131 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:51:34.136
2025-07-01 05:51:34.142 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:51:34.153 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:51:34.165
2025-07-01 05:51:34.174 self = <difflib.Differ object at [hex]>
2025-07-01 05:51:34.183 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:51:34.188 alo = 515, ahi = 1101
2025-07-01 05:51:34.194 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:51:34.199 blo = 515, bhi = 1101
2025-07-01 05:51:34.209
2025-07-01 05:51:34.215 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:51:34.222 g = []
2025-07-01 05:51:34.229 if alo < ahi:
2025-07-01 05:51:34.235 if blo < bhi:
2025-07-01 05:51:34.242 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:51:34.249 else:
2025-07-01 05:51:34.256 g = self._dump('-', a, alo, ahi)
2025-07-01 05:51:34.263 elif blo < bhi:
2025-07-01 05:51:34.271 g = self._dump('+', b, blo, bhi)
2025-07-01 05:51:34.283
2025-07-01 05:51:34.293 > yield from g
2025-07-01 05:51:34.302
2025-07-01 05:51:34.308 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:51:34.315 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:51:34.321
2025-07-01 05:51:34.327 self = <difflib.Differ object at [hex]>
2025-07-01 05:51:34.336 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:51:34.343 alo = 515, ahi = 1101
2025-07-01 05:51:34.351 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:51:34.359 blo = 515, bhi = 1101
2025-07-01 05:51:34.366
2025-07-01 05:51:34.377 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:51:34.386 r"""
2025-07-01 05:51:34.392 When replacing one block of lines with another, search the blocks
2025-07-01 05:51:34.398 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:51:34.403 synch point, and intraline difference marking is done on the
2025-07-01 05:51:34.408 similar pair. Lots of work, but often worth it.
2025-07-01 05:51:34.412
2025-07-01 05:51:34.417 Example:
2025-07-01 05:51:34.423
2025-07-01 05:51:34.428 >>> d = Differ()
2025-07-01 05:51:34.436 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:51:34.443 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:51:34.451 >>> print(''.join(results), end="")
2025-07-01 05:51:34.461 - abcDefghiJkl
2025-07-01 05:51:34.476 + abcdefGhijkl
2025-07-01 05:51:34.487 """
2025-07-01 05:51:34.493
2025-07-01 05:51:34.499 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:51:34.507 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:51:34.517 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:51:34.525 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:51:34.531 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:51:34.538
2025-07-01 05:51:34.543 # search for the pair that matches best without being identical
2025-07-01 05:51:34.555 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:51:34.562 # on junk -- unless we have to)
2025-07-01 05:51:34.568 for j in range(blo, bhi):
2025-07-01 05:51:34.574 bj = b[j]
2025-07-01 05:51:34.583 cruncher.set_seq2(bj)
2025-07-01 05:51:34.595 for i in range(alo, ahi):
2025-07-01 05:51:34.604 ai = a[i]
2025-07-01 05:51:34.612 if ai == bj:
2025-07-01 05:51:34.619 if eqi is None:
2025-07-01 05:51:34.625 eqi, eqj = i, j
2025-07-01 05:51:34.631 continue
2025-07-01 05:51:34.636 cruncher.set_seq1(ai)
2025-07-01 05:51:34.641 # computing similarity is expensive, so use the quick
2025-07-01 05:51:34.646 # upper bounds first -- have seen this speed up messy
2025-07-01 05:51:34.651 # compares by a factor of 3.
2025-07-01 05:51:34.657 # note that ratio() is only expensive to compute the first
2025-07-01 05:51:34.662 # time it's called on a sequence pair; the expensive part
2025-07-01 05:51:34.668 # of the computation is cached by cruncher
2025-07-01 05:51:34.674 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:51:34.679 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:51:34.684 cruncher.ratio() > best_ratio:
2025-07-01 05:51:34.688 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:51:34.693 if best_ratio < cutoff:
2025-07-01 05:51:34.699 # no non-identical "pretty close" pair
2025-07-01 05:51:34.704 if eqi is None:
2025-07-01 05:51:34.713 # no identical pair either -- treat it as a straight replace
2025-07-01 05:51:34.725 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:51:34.733 return
2025-07-01 05:51:34.741 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:51:34.748 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:51:34.753 else:
2025-07-01 05:51:34.759 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:51:34.766 eqi = None
2025-07-01 05:51:34.773
2025-07-01 05:51:34.781 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:51:34.787 # identical
2025-07-01 05:51:34.795
2025-07-01 05:51:34.806 # pump out diffs from before the synch point
2025-07-01 05:51:34.817 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:51:34.827
2025-07-01 05:51:34.834 # do intraline marking on the synch pair
2025-07-01 05:51:34.841 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:51:34.848 if eqi is None:
2025-07-01 05:51:34.855 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:51:34.864 atags = btags = ""
2025-07-01 05:51:34.874 cruncher.set_seqs(aelt, belt)
2025-07-01 05:51:34.887 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:51:34.896 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:51:34.903 if tag == 'replace':
2025-07-01 05:51:34.910 atags += '^' * la
2025-07-01 05:51:34.915 btags += '^' * lb
2025-07-01 05:51:34.921 elif tag == 'delete':
2025-07-01 05:51:34.927 atags += '-' * la
2025-07-01 05:51:34.934 elif tag == 'insert':
2025-07-01 05:51:34.941 btags += '+' * lb
2025-07-01 05:51:34.948 elif tag == 'equal':
2025-07-01 05:51:34.954 atags += ' ' * la
2025-07-01 05:51:34.959 btags += ' ' * lb
2025-07-01 05:51:34.965 else:
2025-07-01 05:51:34.971 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:51:34.977 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:51:34.983 else:
2025-07-01 05:51:34.990 # the synch pair is identical
2025-07-01 05:51:34.997 yield ' ' + aelt
2025-07-01 05:51:35.004
2025-07-01 05:51:35.011 # pump out diffs from after the synch point
2025-07-01 05:51:35.019 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:51:35.025
2025-07-01 05:51:35.033 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:51:35.039 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:51:35.044
2025-07-01 05:51:35.050 self = <difflib.Differ object at [hex]>
2025-07-01 05:51:35.056 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:51:35.063 alo = 516, ahi = 1101
2025-07-01 05:51:35.071 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:51:35.082 blo = 516, bhi = 1101
2025-07-01 05:51:35.090
2025-07-01 05:51:35.096 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:51:35.102 g = []
2025-07-01 05:51:35.109 if alo < ahi:
2025-07-01 05:51:35.115 if blo < bhi:
2025-07-01 05:51:35.121 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:51:35.126 else:
2025-07-01 05:51:35.132 g = self._dump('-', a, alo, ahi)
2025-07-01 05:51:35.138 elif blo < bhi:
2025-07-01 05:51:35.144 g = self._dump('+', b, blo, bhi)
2025-07-01 05:51:35.149
2025-07-01 05:51:35.155 > yield from g
2025-07-01 05:51:35.161
2025-07-01 05:51:35.167 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:51:35.174 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:51:35.180
2025-07-01 05:51:35.187 self = <difflib.Differ object at [hex]>
2025-07-01 05:51:35.196 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:51:35.203 alo = 516, ahi = 1101
2025-07-01 05:51:35.211 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:51:35.218 blo = 516, bhi = 1101
2025-07-01 05:51:35.227
2025-07-01 05:51:35.237 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:51:35.246 r"""
2025-07-01 05:51:35.252 When replacing one block of lines with another, search the blocks
2025-07-01 05:51:35.258 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:51:35.264 synch point, and intraline difference marking is done on the
2025-07-01 05:51:35.270 similar pair. Lots of work, but often worth it.
2025-07-01 05:51:35.276
2025-07-01 05:51:35.282 Example:
2025-07-01 05:51:35.288
2025-07-01 05:51:35.294 >>> d = Differ()
2025-07-01 05:51:35.303 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:51:35.312 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:51:35.317 >>> print(''.join(results), end="")
2025-07-01 05:51:35.323 - abcDefghiJkl
2025-07-01 05:51:35.334 + abcdefGhijkl
2025-07-01 05:51:35.345 """
2025-07-01 05:51:35.351
2025-07-01 05:51:35.359 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:51:35.373 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:51:35.384 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:51:35.393 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:51:35.400 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:51:35.406
2025-07-01 05:51:35.412 # search for the pair that matches best without being identical
2025-07-01 05:51:35.418 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:51:35.424 # on junk -- unless we have to)
2025-07-01 05:51:35.430 for j in range(blo, bhi):
2025-07-01 05:51:35.436 bj = b[j]
2025-07-01 05:51:35.441 cruncher.set_seq2(bj)
2025-07-01 05:51:35.446 for i in range(alo, ahi):
2025-07-01 05:51:35.451 ai = a[i]
2025-07-01 05:51:35.457 if ai == bj:
2025-07-01 05:51:35.463 if eqi is None:
2025-07-01 05:51:35.469 eqi, eqj = i, j
2025-07-01 05:51:35.475 continue
2025-07-01 05:51:35.481 cruncher.set_seq1(ai)
2025-07-01 05:51:35.488 # computing similarity is expensive, so use the quick
2025-07-01 05:51:35.493 # upper bounds first -- have seen this speed up messy
2025-07-01 05:51:35.499 # compares by a factor of 3.
2025-07-01 05:51:35.505 # note that ratio() is only expensive to compute the first
2025-07-01 05:51:35.512 # time it's called on a sequence pair; the expensive part
2025-07-01 05:51:35.519 # of the computation is cached by cruncher
2025-07-01 05:51:35.527 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:51:35.534 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:51:35.541 cruncher.ratio() > best_ratio:
2025-07-01 05:51:35.549 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:51:35.557 if best_ratio < cutoff:
2025-07-01 05:51:35.563 # no non-identical "pretty close" pair
2025-07-01 05:51:35.570 if eqi is None:
2025-07-01 05:51:35.579 # no identical pair either -- treat it as a straight replace
2025-07-01 05:51:35.590 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:51:35.597 return
2025-07-01 05:51:35.603 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:51:35.609 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:51:35.615 else:
2025-07-01 05:51:35.621 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:51:35.627 eqi = None
2025-07-01 05:51:35.634
2025-07-01 05:51:35.644 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:51:35.653 # identical
2025-07-01 05:51:35.661
2025-07-01 05:51:35.667 # pump out diffs from before the synch point
2025-07-01 05:51:35.673 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:51:35.678
2025-07-01 05:51:35.684 # do intraline marking on the synch pair
2025-07-01 05:51:35.690 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:51:35.695 if eqi is None:
2025-07-01 05:51:35.700 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:51:35.706 atags = btags = ""
2025-07-01 05:51:35.711 cruncher.set_seqs(aelt, belt)
2025-07-01 05:51:35.717 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:51:35.723 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:51:35.728 if tag == 'replace':
2025-07-01 05:51:35.734 atags += '^' * la
2025-07-01 05:51:35.743 btags += '^' * lb
2025-07-01 05:51:35.752 elif tag == 'delete':
2025-07-01 05:51:35.759 atags += '-' * la
2025-07-01 05:51:35.765 elif tag == 'insert':
2025-07-01 05:51:35.770 btags += '+' * lb
2025-07-01 05:51:35.775 elif tag == 'equal':
2025-07-01 05:51:35.780 atags += ' ' * la
2025-07-01 05:51:35.785 btags += ' ' * lb
2025-07-01 05:51:35.791 else:
2025-07-01 05:51:35.797 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:51:35.803 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:51:35.808 else:
2025-07-01 05:51:35.814 # the synch pair is identical
2025-07-01 05:51:35.819 yield ' ' + aelt
2025-07-01 05:51:35.825
2025-07-01 05:51:35.838 # pump out diffs from after the synch point
2025-07-01 05:51:35.849 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:51:35.857
2025-07-01 05:51:35.864 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:51:35.870 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:51:35.875
2025-07-01 05:51:35.880 self = <difflib.Differ object at [hex]>
2025-07-01 05:51:35.886 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:51:35.891 alo = 517, ahi = 1101
2025-07-01 05:51:35.898 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:51:35.905 blo = 517, bhi = 1101
2025-07-01 05:51:35.911
2025-07-01 05:51:35.917 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:51:35.922 g = []
2025-07-01 05:51:35.927 if alo < ahi:
2025-07-01 05:51:35.932 if blo < bhi:
2025-07-01 05:51:35.937 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:51:35.942 else:
2025-07-01 05:51:35.947 g = self._dump('-', a, alo, ahi)
2025-07-01 05:51:35.953 elif blo < bhi:
2025-07-01 05:51:35.958 g = self._dump('+', b, blo, bhi)
2025-07-01 05:51:35.964
2025-07-01 05:51:35.970 > yield from g
2025-07-01 05:51:35.976
2025-07-01 05:51:35.983 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:51:35.990 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:51:35.996
2025-07-01 05:51:36.003 self = <difflib.Differ object at [hex]>
2025-07-01 05:51:36.011 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:51:36.018 alo = 517, ahi = 1101
2025-07-01 05:51:36.026 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:51:36.033 blo = 517, bhi = 1101
2025-07-01 05:51:36.039
2025-07-01 05:51:36.046 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:51:36.052 r"""
2025-07-01 05:51:36.057 When replacing one block of lines with another, search the blocks
2025-07-01 05:51:36.063 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:51:36.067 synch point, and intraline difference marking is done on the
2025-07-01 05:51:36.072 similar pair. Lots of work, but often worth it.
2025-07-01 05:51:36.076
2025-07-01 05:51:36.081 Example:
2025-07-01 05:51:36.087
2025-07-01 05:51:36.092 >>> d = Differ()
2025-07-01 05:51:36.098 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:51:36.105 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:51:36.112 >>> print(''.join(results), end="")
2025-07-01 05:51:36.120 - abcDefghiJkl
2025-07-01 05:51:36.131 + abcdefGhijkl
2025-07-01 05:51:36.140 """
2025-07-01 05:51:36.145
2025-07-01 05:51:36.151 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:51:36.157 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:51:36.163 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:51:36.170 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:51:36.176 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:51:36.182
2025-07-01 05:51:36.188 # search for the pair that matches best without being identical
2025-07-01 05:51:36.194 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:51:36.200 # on junk -- unless we have to)
2025-07-01 05:51:36.207 for j in range(blo, bhi):
2025-07-01 05:51:36.214 bj = b[j]
2025-07-01 05:51:36.219 cruncher.set_seq2(bj)
2025-07-01 05:51:36.225 for i in range(alo, ahi):
2025-07-01 05:51:36.230 ai = a[i]
2025-07-01 05:51:36.236 if ai == bj:
2025-07-01 05:51:36.242 if eqi is None:
2025-07-01 05:51:36.248 eqi, eqj = i, j
2025-07-01 05:51:36.254 continue
2025-07-01 05:51:36.260 cruncher.set_seq1(ai)
2025-07-01 05:51:36.265 # computing similarity is expensive, so use the quick
2025-07-01 05:51:36.271 # upper bounds first -- have seen this speed up messy
2025-07-01 05:51:36.277 # compares by a factor of 3.
2025-07-01 05:51:36.283 # note that ratio() is only expensive to compute the first
2025-07-01 05:51:36.289 # time it's called on a sequence pair; the expensive part
2025-07-01 05:51:36.295 # of the computation is cached by cruncher
2025-07-01 05:51:36.300 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:51:36.306 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:51:36.312 cruncher.ratio() > best_ratio:
2025-07-01 05:51:36.318 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:51:36.324 if best_ratio < cutoff:
2025-07-01 05:51:36.331 # no non-identical "pretty close" pair
2025-07-01 05:51:36.337 if eqi is None:
2025-07-01 05:51:36.344 # no identical pair either -- treat it as a straight replace
2025-07-01 05:51:36.352 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:51:36.358 return
2025-07-01 05:51:36.364 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:51:36.369 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:51:36.375 else:
2025-07-01 05:51:36.381 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:51:36.387 eqi = None
2025-07-01 05:51:36.393
2025-07-01 05:51:36.399 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:51:36.405 # identical
2025-07-01 05:51:36.411
2025-07-01 05:51:36.417 # pump out diffs from before the synch point
2025-07-01 05:51:36.423 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:51:36.429
2025-07-01 05:51:36.434 # do intraline marking on the synch pair
2025-07-01 05:51:36.440 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:51:36.446 if eqi is None:
2025-07-01 05:51:36.452 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:51:36.458 atags = btags = ""
2025-07-01 05:51:36.464 cruncher.set_seqs(aelt, belt)
2025-07-01 05:51:36.470 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:51:36.475 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:51:36.481 if tag == 'replace':
2025-07-01 05:51:36.486 atags += '^' * la
2025-07-01 05:51:36.492 btags += '^' * lb
2025-07-01 05:51:36.498 elif tag == 'delete':
2025-07-01 05:51:36.503 atags += '-' * la
2025-07-01 05:51:36.509 elif tag == 'insert':
2025-07-01 05:51:36.515 btags += '+' * lb
2025-07-01 05:51:36.521 elif tag == 'equal':
2025-07-01 05:51:36.526 atags += ' ' * la
2025-07-01 05:51:36.532 btags += ' ' * lb
2025-07-01 05:51:36.539 else:
2025-07-01 05:51:36.549 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:51:36.555 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:51:36.561 else:
2025-07-01 05:51:36.567 # the synch pair is identical
2025-07-01 05:51:36.573 yield ' ' + aelt
2025-07-01 05:51:36.581
2025-07-01 05:51:36.588 # pump out diffs from after the synch point
2025-07-01 05:51:36.595 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:51:36.602
2025-07-01 05:51:36.608 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:51:36.617 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:51:36.623
2025-07-01 05:51:36.629 self = <difflib.Differ object at [hex]>
2025-07-01 05:51:36.635 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:51:36.642 alo = 518, ahi = 1101
2025-07-01 05:51:36.654 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:51:36.663 blo = 518, bhi = 1101
2025-07-01 05:51:36.670
2025-07-01 05:51:36.677 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:51:36.682 g = []
2025-07-01 05:51:36.688 if alo < ahi:
2025-07-01 05:51:36.694 if blo < bhi:
2025-07-01 05:51:36.702 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:51:36.708 else:
2025-07-01 05:51:36.719 g = self._dump('-', a, alo, ahi)
2025-07-01 05:51:36.727 elif blo < bhi:
2025-07-01 05:51:36.735 g = self._dump('+', b, blo, bhi)
2025-07-01 05:51:36.742
2025-07-01 05:51:36.748 > yield from g
2025-07-01 05:51:36.755
2025-07-01 05:51:36.760 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:51:36.765 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:51:36.770
2025-07-01 05:51:36.777 self = <difflib.Differ object at [hex]>
2025-07-01 05:51:36.783 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:51:36.789 alo = 518, ahi = 1101
2025-07-01 05:51:36.796 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:51:36.801 blo = 518, bhi = 1101
2025-07-01 05:51:36.807
2025-07-01 05:51:36.812 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:51:36.818 r"""
2025-07-01 05:51:36.824 When replacing one block of lines with another, search the blocks
2025-07-01 05:51:36.830 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:51:36.836 synch point, and intraline difference marking is done on the
2025-07-01 05:51:36.843 similar pair. Lots of work, but often worth it.
2025-07-01 05:51:36.851
2025-07-01 05:51:36.863 Example:
2025-07-01 05:51:36.872
2025-07-01 05:51:36.878 >>> d = Differ()
2025-07-01 05:51:36.884 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:51:36.889 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:51:36.897 >>> print(''.join(results), end="")
2025-07-01 05:51:36.903 - abcDefghiJkl
2025-07-01 05:51:36.917 + abcdefGhijkl
2025-07-01 05:51:36.927 """
2025-07-01 05:51:36.933
2025-07-01 05:51:36.938 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:51:36.944 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:51:36.950 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:51:36.955 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:51:36.960 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:51:36.965
2025-07-01 05:51:36.970 # search for the pair that matches best without being identical
2025-07-01 05:51:36.975 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:51:36.980 # on junk -- unless we have to)
2025-07-01 05:51:36.986 for j in range(blo, bhi):
2025-07-01 05:51:36.992 bj = b[j]
2025-07-01 05:51:36.998 cruncher.set_seq2(bj)
2025-07-01 05:51:37.004 for i in range(alo, ahi):
2025-07-01 05:51:37.011 ai = a[i]
2025-07-01 05:51:37.018 if ai == bj:
2025-07-01 05:51:37.029 if eqi is None:
2025-07-01 05:51:37.039 eqi, eqj = i, j
2025-07-01 05:51:37.047 continue
2025-07-01 05:51:37.053 cruncher.set_seq1(ai)
2025-07-01 05:51:37.058 # computing similarity is expensive, so use the quick
2025-07-01 05:51:37.063 # upper bounds first -- have seen this speed up messy
2025-07-01 05:51:37.068 # compares by a factor of 3.
2025-07-01 05:51:37.073 # note that ratio() is only expensive to compute the first
2025-07-01 05:51:37.078 # time it's called on a sequence pair; the expensive part
2025-07-01 05:51:37.082 # of the computation is cached by cruncher
2025-07-01 05:51:37.087 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:51:37.092 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:51:37.097 cruncher.ratio() > best_ratio:
2025-07-01 05:51:37.102 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:51:37.107 if best_ratio < cutoff:
2025-07-01 05:51:37.113 # no non-identical "pretty close" pair
2025-07-01 05:51:37.119 if eqi is None:
2025-07-01 05:51:37.126 # no identical pair either -- treat it as a straight replace
2025-07-01 05:51:37.133 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:51:37.140 return
2025-07-01 05:51:37.147 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:51:37.154 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:51:37.161 else:
2025-07-01 05:51:37.167 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:51:37.174 eqi = None
2025-07-01 05:51:37.181
2025-07-01 05:51:37.188 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:51:37.198 # identical
2025-07-01 05:51:37.205
2025-07-01 05:51:37.211 # pump out diffs from before the synch point
2025-07-01 05:51:37.216 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:51:37.222
2025-07-01 05:51:37.228 # do intraline marking on the synch pair
2025-07-01 05:51:37.237 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:51:37.248 if eqi is None:
2025-07-01 05:51:37.257 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:51:37.263 atags = btags = ""
2025-07-01 05:51:37.270 cruncher.set_seqs(aelt, belt)
2025-07-01 05:51:37.275 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:51:37.279 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:51:37.284 if tag == 'replace':
2025-07-01 05:51:37.288 atags += '^' * la
2025-07-01 05:51:37.293 btags += '^' * lb
2025-07-01 05:51:37.299 elif tag == 'delete':
2025-07-01 05:51:37.304 atags += '-' * la
2025-07-01 05:51:37.310 elif tag == 'insert':
2025-07-01 05:51:37.316 btags += '+' * lb
2025-07-01 05:51:37.323 elif tag == 'equal':
2025-07-01 05:51:37.333 atags += ' ' * la
2025-07-01 05:51:37.344 btags += ' ' * lb
2025-07-01 05:51:37.352 else:
2025-07-01 05:51:37.365 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:51:37.376 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:51:37.387 else:
2025-07-01 05:51:37.400 # the synch pair is identical
2025-07-01 05:51:37.409 yield ' ' + aelt
2025-07-01 05:51:37.417
2025-07-01 05:51:37.428 # pump out diffs from after the synch point
2025-07-01 05:51:37.438 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:51:37.448
2025-07-01 05:51:37.456 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:51:37.464 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:51:37.476
2025-07-01 05:51:37.489 self = <difflib.Differ object at [hex]>
2025-07-01 05:51:37.500 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:51:37.512 alo = 519, ahi = 1101
2025-07-01 05:51:37.525 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:51:37.535 blo = 519, bhi = 1101
2025-07-01 05:51:37.544
2025-07-01 05:51:37.552 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:51:37.558 g = []
2025-07-01 05:51:37.569 if alo < ahi:
2025-07-01 05:51:37.578 if blo < bhi:
2025-07-01 05:51:37.589 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:51:37.600 else:
2025-07-01 05:51:37.610 g = self._dump('-', a, alo, ahi)
2025-07-01 05:51:37.621 elif blo < bhi:
2025-07-01 05:51:37.631 g = self._dump('+', b, blo, bhi)
2025-07-01 05:51:37.641
2025-07-01 05:51:37.652 > yield from g
2025-07-01 05:51:37.659
2025-07-01 05:51:37.667 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:51:37.679 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:51:37.691
2025-07-01 05:51:37.701 self = <difflib.Differ object at [hex]>
2025-07-01 05:51:37.710 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:51:37.721 alo = 519, ahi = 1101
2025-07-01 05:51:37.731 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:51:37.738 blo = 519, bhi = 1101
2025-07-01 05:51:37.748
2025-07-01 05:51:37.758 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:51:37.767 r"""
2025-07-01 05:51:37.778 When replacing one block of lines with another, search the blocks
2025-07-01 05:51:37.788 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:51:37.800 synch point, and intraline difference marking is done on the
2025-07-01 05:51:37.811 similar pair. Lots of work, but often worth it.
2025-07-01 05:51:37.823
2025-07-01 05:51:37.836 Example:
2025-07-01 05:51:37.845
2025-07-01 05:51:37.853 >>> d = Differ()
2025-07-01 05:51:37.860 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:51:37.867 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:51:37.873 >>> print(''.join(results), end="")
2025-07-01 05:51:37.878 - abcDefghiJkl
2025-07-01 05:51:37.899 + abcdefGhijkl
2025-07-01 05:51:37.916 """
2025-07-01 05:51:37.925
2025-07-01 05:51:37.937 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:51:37.949 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:51:37.963 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:51:37.975 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:51:37.987 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:51:37.997
2025-07-01 05:51:38.007 # search for the pair that matches best without being identical
2025-07-01 05:51:38.018 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:51:38.027 # on junk -- unless we have to)
2025-07-01 05:51:38.034 for j in range(blo, bhi):
2025-07-01 05:51:38.044 bj = b[j]
2025-07-01 05:51:38.052 cruncher.set_seq2(bj)
2025-07-01 05:51:38.059 for i in range(alo, ahi):
2025-07-01 05:51:38.065 ai = a[i]
2025-07-01 05:51:38.071 if ai == bj:
2025-07-01 05:51:38.077 if eqi is None:
2025-07-01 05:51:38.083 eqi, eqj = i, j
2025-07-01 05:51:38.089 continue
2025-07-01 05:51:38.094 cruncher.set_seq1(ai)
2025-07-01 05:51:38.107 # computing similarity is expensive, so use the quick
2025-07-01 05:51:38.119 # upper bounds first -- have seen this speed up messy
2025-07-01 05:51:38.129 # compares by a factor of 3.
2025-07-01 05:51:38.137 # note that ratio() is only expensive to compute the first
2025-07-01 05:51:38.145 # time it's called on a sequence pair; the expensive part
2025-07-01 05:51:38.151 # of the computation is cached by cruncher
2025-07-01 05:51:38.157 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:51:38.162 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:51:38.167 cruncher.ratio() > best_ratio:
2025-07-01 05:51:38.171 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:51:38.175 if best_ratio < cutoff:
2025-07-01 05:51:38.180 # no non-identical "pretty close" pair
2025-07-01 05:51:38.187 if eqi is None:
2025-07-01 05:51:38.197 # no identical pair either -- treat it as a straight replace
2025-07-01 05:51:38.207 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:51:38.216 return
2025-07-01 05:51:38.230 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:51:38.241 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:51:38.252 else:
2025-07-01 05:51:38.264 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:51:38.275 eqi = None
2025-07-01 05:51:38.283
2025-07-01 05:51:38.290 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:51:38.297 # identical
2025-07-01 05:51:38.302
2025-07-01 05:51:38.308 # pump out diffs from before the synch point
2025-07-01 05:51:38.314 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:51:38.325
2025-07-01 05:51:38.335 # do intraline marking on the synch pair
2025-07-01 05:51:38.343 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:51:38.352 if eqi is None:
2025-07-01 05:51:38.364 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:51:38.372 atags = btags = ""
2025-07-01 05:51:38.379 cruncher.set_seqs(aelt, belt)
2025-07-01 05:51:38.387 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:51:38.397 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:51:38.405 if tag == 'replace':
2025-07-01 05:51:38.417 atags += '^' * la
2025-07-01 05:51:38.429 btags += '^' * lb
2025-07-01 05:51:38.437 elif tag == 'delete':
2025-07-01 05:51:38.449 atags += '-' * la
2025-07-01 05:51:38.459 elif tag == 'insert':
2025-07-01 05:51:38.472 btags += '+' * lb
2025-07-01 05:51:38.481 elif tag == 'equal':
2025-07-01 05:51:38.487 atags += ' ' * la
2025-07-01 05:51:38.500 btags += ' ' * lb
2025-07-01 05:51:38.509 else:
2025-07-01 05:51:38.520 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:51:38.529 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:51:38.538 else:
2025-07-01 05:51:38.544 # the synch pair is identical
2025-07-01 05:51:38.550 yield ' ' + aelt
2025-07-01 05:51:38.559
2025-07-01 05:51:38.567 # pump out diffs from after the synch point
2025-07-01 05:51:38.577 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:51:38.586
2025-07-01 05:51:38.595 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:51:38.604 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:51:38.614
2025-07-01 05:51:38.622 self = <difflib.Differ object at [hex]>
2025-07-01 05:51:38.628 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:51:38.634 alo = 520, ahi = 1101
2025-07-01 05:51:38.639 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:51:38.644 blo = 520, bhi = 1101
2025-07-01 05:51:38.650
2025-07-01 05:51:38.656 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:51:38.663 g = []
2025-07-01 05:51:38.674 if alo < ahi:
2025-07-01 05:51:38.682 if blo < bhi:
2025-07-01 05:51:38.693 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:51:38.701 else:
2025-07-01 05:51:38.710 g = self._dump('-', a, alo, ahi)
2025-07-01 05:51:38.721 elif blo < bhi:
2025-07-01 05:51:38.729 g = self._dump('+', b, blo, bhi)
2025-07-01 05:51:38.736
2025-07-01 05:51:38.743 > yield from g
2025-07-01 05:51:38.751
2025-07-01 05:51:38.757 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:51:38.763 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:51:38.768
2025-07-01 05:51:38.775 self = <difflib.Differ object at [hex]>
2025-07-01 05:51:38.781 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:51:38.787 alo = 520, ahi = 1101
2025-07-01 05:51:38.794 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:51:38.799 blo = 520, bhi = 1101
2025-07-01 05:51:38.804
2025-07-01 05:51:38.808 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:51:38.813 r"""
2025-07-01 05:51:38.818 When replacing one block of lines with another, search the blocks
2025-07-01 05:51:38.822 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:51:38.827 synch point, and intraline difference marking is done on the
2025-07-01 05:51:38.833 similar pair. Lots of work, but often worth it.
2025-07-01 05:51:38.838
2025-07-01 05:51:38.848 Example:
2025-07-01 05:51:38.859
2025-07-01 05:51:38.867 >>> d = Differ()
2025-07-01 05:51:38.874 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:51:38.881 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:51:38.887 >>> print(''.join(results), end="")
2025-07-01 05:51:38.892 - abcDefghiJkl
2025-07-01 05:51:38.903 + abcdefGhijkl
2025-07-01 05:51:38.921 """
2025-07-01 05:51:38.928
2025-07-01 05:51:38.935 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:51:38.944 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:51:38.956 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:51:38.966 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:51:38.980 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:51:38.988
2025-07-01 05:51:38.997 # search for the pair that matches best without being identical
2025-07-01 05:51:39.007 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:51:39.018 # on junk -- unless we have to)
2025-07-01 05:51:39.032 for j in range(blo, bhi):
2025-07-01 05:51:39.044 bj = b[j]
2025-07-01 05:51:39.053 cruncher.set_seq2(bj)
2025-07-01 05:51:39.060 for i in range(alo, ahi):
2025-07-01 05:51:39.072 ai = a[i]
2025-07-01 05:51:39.083 if ai == bj:
2025-07-01 05:51:39.091 if eqi is None:
2025-07-01 05:51:39.099 eqi, eqj = i, j
2025-07-01 05:51:39.112 continue
2025-07-01 05:51:39.124 cruncher.set_seq1(ai)
2025-07-01 05:51:39.133 # computing similarity is expensive, so use the quick
2025-07-01 05:51:39.141 # upper bounds first -- have seen this speed up messy
2025-07-01 05:51:39.148 # compares by a factor of 3.
2025-07-01 05:51:39.155 # note that ratio() is only expensive to compute the first
2025-07-01 05:51:39.168 # time it's called on a sequence pair; the expensive part
2025-07-01 05:51:39.177 # of the computation is cached by cruncher
2025-07-01 05:51:39.189 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:51:39.199 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:51:39.208 cruncher.ratio() > best_ratio:
2025-07-01 05:51:39.220 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:51:39.228 if best_ratio < cutoff:
2025-07-01 05:51:39.241 # no non-identical "pretty close" pair
2025-07-01 05:51:39.254 if eqi is None:
2025-07-01 05:51:39.262 # no identical pair either -- treat it as a straight replace
2025-07-01 05:51:39.273 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:51:39.283 return
2025-07-01 05:51:39.291 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:51:39.298 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:51:39.309 else:
2025-07-01 05:51:39.322 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:51:39.331 eqi = None
2025-07-01 05:51:39.340
2025-07-01 05:51:39.350 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:51:39.362 # identical
2025-07-01 05:51:39.373
2025-07-01 05:51:39.383 # pump out diffs from before the synch point
2025-07-01 05:51:39.394 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:51:39.404
2025-07-01 05:51:39.415 # do intraline marking on the synch pair
2025-07-01 05:51:39.423 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:51:39.432 if eqi is None:
2025-07-01 05:51:39.443 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:51:39.452 atags = btags = ""
2025-07-01 05:51:39.459 cruncher.set_seqs(aelt, belt)
2025-07-01 05:51:39.465 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:51:39.475 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:51:39.484 if tag == 'replace':
2025-07-01 05:51:39.492 atags += '^' * la
2025-07-01 05:51:39.499 btags += '^' * lb
2025-07-01 05:51:39.506 elif tag == 'delete':
2025-07-01 05:51:39.516 atags += '-' * la
2025-07-01 05:51:39.525 elif tag == 'insert':
2025-07-01 05:51:39.531 btags += '+' * lb
2025-07-01 05:51:39.537 elif tag == 'equal':
2025-07-01 05:51:39.542 atags += ' ' * la
2025-07-01 05:51:39.546 btags += ' ' * lb
2025-07-01 05:51:39.551 else:
2025-07-01 05:51:39.556 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:51:39.565 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:51:39.578 else:
2025-07-01 05:51:39.587 # the synch pair is identical
2025-07-01 05:51:39.594 yield ' ' + aelt
2025-07-01 05:51:39.600
2025-07-01 05:51:39.606 # pump out diffs from after the synch point
2025-07-01 05:51:39.611 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:51:39.615
2025-07-01 05:51:39.620 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:51:39.624 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:51:39.629
2025-07-01 05:51:39.634 self = <difflib.Differ object at [hex]>
2025-07-01 05:51:39.641 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:51:39.647 alo = 521, ahi = 1101
2025-07-01 05:51:39.654 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:51:39.659 blo = 521, bhi = 1101
2025-07-01 05:51:39.665
2025-07-01 05:51:39.670 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:51:39.676 g = []
2025-07-01 05:51:39.681 if alo < ahi:
2025-07-01 05:51:39.687 if blo < bhi:
2025-07-01 05:51:39.694 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:51:39.701 else:
2025-07-01 05:51:39.707 g = self._dump('-', a, alo, ahi)
2025-07-01 05:51:39.715 elif blo < bhi:
2025-07-01 05:51:39.727 g = self._dump('+', b, blo, bhi)
2025-07-01 05:51:39.735
2025-07-01 05:51:39.743 > yield from g
2025-07-01 05:51:39.751
2025-07-01 05:51:39.762 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:51:39.770 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:51:39.778
2025-07-01 05:51:39.789 self = <difflib.Differ object at [hex]>
2025-07-01 05:51:39.801 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:51:39.811 alo = 521, ahi = 1101
2025-07-01 05:51:39.824 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:51:39.834 blo = 521, bhi = 1101
2025-07-01 05:51:39.848
2025-07-01 05:51:39.862 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:51:39.875 r"""
2025-07-01 05:51:39.887 When replacing one block of lines with another, search the blocks
2025-07-01 05:51:39.897 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:51:39.904 synch point, and intraline difference marking is done on the
2025-07-01 05:51:39.915 similar pair. Lots of work, but often worth it.
2025-07-01 05:51:39.928
2025-07-01 05:51:39.938 Example:
2025-07-01 05:51:39.947
2025-07-01 05:51:39.955 >>> d = Differ()
2025-07-01 05:51:39.962 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:51:39.969 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:51:39.976 >>> print(''.join(results), end="")
2025-07-01 05:51:39.982 - abcDefghiJkl
2025-07-01 05:51:40.005 + abcdefGhijkl
2025-07-01 05:51:40.029 """
2025-07-01 05:51:40.037
2025-07-01 05:51:40.044 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:51:40.052 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:51:40.059 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:51:40.065 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:51:40.072 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:51:40.078
2025-07-01 05:51:40.087 # search for the pair that matches best without being identical
2025-07-01 05:51:40.099 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:51:40.107 # on junk -- unless we have to)
2025-07-01 05:51:40.114 for j in range(blo, bhi):
2025-07-01 05:51:40.119 bj = b[j]
2025-07-01 05:51:40.124 cruncher.set_seq2(bj)
2025-07-01 05:51:40.130 for i in range(alo, ahi):
2025-07-01 05:51:40.137 ai = a[i]
2025-07-01 05:51:40.144 if ai == bj:
2025-07-01 05:51:40.150 if eqi is None:
2025-07-01 05:51:40.160 eqi, eqj = i, j
2025-07-01 05:51:40.170 continue
2025-07-01 05:51:40.183 cruncher.set_seq1(ai)
2025-07-01 05:51:40.195 # computing similarity is expensive, so use the quick
2025-07-01 05:51:40.205 # upper bounds first -- have seen this speed up messy
2025-07-01 05:51:40.213 # compares by a factor of 3.
2025-07-01 05:51:40.220 # note that ratio() is only expensive to compute the first
2025-07-01 05:51:40.226 # time it's called on a sequence pair; the expensive part
2025-07-01 05:51:40.233 # of the computation is cached by cruncher
2025-07-01 05:51:40.239 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:51:40.245 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:51:40.251 cruncher.ratio() > best_ratio:
2025-07-01 05:51:40.261 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:51:40.268 if best_ratio < cutoff:
2025-07-01 05:51:40.275 # no non-identical "pretty close" pair
2025-07-01 05:51:40.286 if eqi is None:
2025-07-01 05:51:40.294 # no identical pair either -- treat it as a straight replace
2025-07-01 05:51:40.306 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:51:40.318 return
2025-07-01 05:51:40.327 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:51:40.335 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:51:40.341 else:
2025-07-01 05:51:40.348 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:51:40.355 eqi = None
2025-07-01 05:51:40.361
2025-07-01 05:51:40.370 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:51:40.379 # identical
2025-07-01 05:51:40.386
2025-07-01 05:51:40.393 # pump out diffs from before the synch point
2025-07-01 05:51:40.400 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:51:40.407
2025-07-01 05:51:40.414 # do intraline marking on the synch pair
2025-07-01 05:51:40.425 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:51:40.433 if eqi is None:
2025-07-01 05:51:40.439 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:51:40.445 atags = btags = ""
2025-07-01 05:51:40.450 cruncher.set_seqs(aelt, belt)
2025-07-01 05:51:40.456 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:51:40.462 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:51:40.469 if tag == 'replace':
2025-07-01 05:51:40.476 atags += '^' * la
2025-07-01 05:51:40.482 btags += '^' * lb
2025-07-01 05:51:40.489 elif tag == 'delete':
2025-07-01 05:51:40.499 atags += '-' * la
2025-07-01 05:51:40.509 elif tag == 'insert':
2025-07-01 05:51:40.522 btags += '+' * lb
2025-07-01 05:51:40.533 elif tag == 'equal':
2025-07-01 05:51:40.543 atags += ' ' * la
2025-07-01 05:51:40.555 btags += ' ' * lb
2025-07-01 05:51:40.564 else:
2025-07-01 05:51:40.572 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:51:40.579 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:51:40.585 else:
2025-07-01 05:51:40.591 # the synch pair is identical
2025-07-01 05:51:40.599 yield ' ' + aelt
2025-07-01 05:51:40.609
2025-07-01 05:51:40.617 # pump out diffs from after the synch point
2025-07-01 05:51:40.624 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:51:40.633
2025-07-01 05:51:40.640 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:51:40.648 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:51:40.655
2025-07-01 05:51:40.661 self = <difflib.Differ object at [hex]>
2025-07-01 05:51:40.669 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:51:40.674 alo = 522, ahi = 1101
2025-07-01 05:51:40.682 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:51:40.694 blo = 522, bhi = 1101
2025-07-01 05:51:40.703
2025-07-01 05:51:40.711 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:51:40.717 g = []
2025-07-01 05:51:40.724 if alo < ahi:
2025-07-01 05:51:40.730 if blo < bhi:
2025-07-01 05:51:40.736 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:51:40.741 else:
2025-07-01 05:51:40.747 g = self._dump('-', a, alo, ahi)
2025-07-01 05:51:40.755 elif blo < bhi:
2025-07-01 05:51:40.764 g = self._dump('+', b, blo, bhi)
2025-07-01 05:51:40.772
2025-07-01 05:51:40.779 > yield from g
2025-07-01 05:51:40.788
2025-07-01 05:51:40.796 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:51:40.804 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:51:40.810
2025-07-01 05:51:40.816 self = <difflib.Differ object at [hex]>
2025-07-01 05:51:40.828 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:51:40.836 alo = 522, ahi = 1101
2025-07-01 05:51:40.847 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:51:40.854 blo = 522, bhi = 1101
2025-07-01 05:51:40.862
2025-07-01 05:51:40.869 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:51:40.875 r"""
2025-07-01 05:51:40.883 When replacing one block of lines with another, search the blocks
2025-07-01 05:51:40.895 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:51:40.906 synch point, and intraline difference marking is done on the
2025-07-01 05:51:40.916 similar pair. Lots of work, but often worth it.
2025-07-01 05:51:40.925
2025-07-01 05:51:40.937 Example:
2025-07-01 05:51:40.947
2025-07-01 05:51:40.959 >>> d = Differ()
2025-07-01 05:51:40.969 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:51:40.980 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:51:40.991 >>> print(''.join(results), end="")
2025-07-01 05:51:41.000 - abcDefghiJkl
2025-07-01 05:51:41.020 + abcdefGhijkl
2025-07-01 05:51:41.034 """
2025-07-01 05:51:41.040
2025-07-01 05:51:41.047 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:51:41.053 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:51:41.059 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:51:41.065 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:51:41.071 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:51:41.076
2025-07-01 05:51:41.082 # search for the pair that matches best without being identical
2025-07-01 05:51:41.088 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:51:41.094 # on junk -- unless we have to)
2025-07-01 05:51:41.104 for j in range(blo, bhi):
2025-07-01 05:51:41.113 bj = b[j]
2025-07-01 05:51:41.120 cruncher.set_seq2(bj)
2025-07-01 05:51:41.127 for i in range(alo, ahi):
2025-07-01 05:51:41.139 ai = a[i]
2025-07-01 05:51:41.151 if ai == bj:
2025-07-01 05:51:41.161 if eqi is None:
2025-07-01 05:51:41.169 eqi, eqj = i, j
2025-07-01 05:51:41.175 continue
2025-07-01 05:51:41.181 cruncher.set_seq1(ai)
2025-07-01 05:51:41.188 # computing similarity is expensive, so use the quick
2025-07-01 05:51:41.193 # upper bounds first -- have seen this speed up messy
2025-07-01 05:51:41.199 # compares by a factor of 3.
2025-07-01 05:51:41.205 # note that ratio() is only expensive to compute the first
2025-07-01 05:51:41.211 # time it's called on a sequence pair; the expensive part
2025-07-01 05:51:41.217 # of the computation is cached by cruncher
2025-07-01 05:51:41.222 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:51:41.228 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:51:41.237 cruncher.ratio() > best_ratio:
2025-07-01 05:51:41.244 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:51:41.251 if best_ratio < cutoff:
2025-07-01 05:51:41.258 # no non-identical "pretty close" pair
2025-07-01 05:51:41.263 if eqi is None:
2025-07-01 05:51:41.269 # no identical pair either -- treat it as a straight replace
2025-07-01 05:51:41.274 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:51:41.279 return
2025-07-01 05:51:41.285 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:51:41.291 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:51:41.296 else:
2025-07-01 05:51:41.304 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:51:41.310 eqi = None
2025-07-01 05:51:41.317
2025-07-01 05:51:41.323 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:51:41.331 # identical
2025-07-01 05:51:41.340
2025-07-01 05:51:41.349 # pump out diffs from before the synch point
2025-07-01 05:51:41.361 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:51:41.368
2025-07-01 05:51:41.375 # do intraline marking on the synch pair
2025-07-01 05:51:41.382 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:51:41.387 if eqi is None:
2025-07-01 05:51:41.392 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:51:41.397 atags = btags = ""
2025-07-01 05:51:41.401 cruncher.set_seqs(aelt, belt)
2025-07-01 05:51:41.406 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:51:41.410 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:51:41.419 if tag == 'replace':
2025-07-01 05:51:41.430 atags += '^' * la
2025-07-01 05:51:41.439 btags += '^' * lb
2025-07-01 05:51:41.447 elif tag == 'delete':
2025-07-01 05:51:41.454 atags += '-' * la
2025-07-01 05:51:41.459 elif tag == 'insert':
2025-07-01 05:51:41.466 btags += '+' * lb
2025-07-01 05:51:41.471 elif tag == 'equal':
2025-07-01 05:51:41.476 atags += ' ' * la
2025-07-01 05:51:41.482 btags += ' ' * lb
2025-07-01 05:51:41.487 else:
2025-07-01 05:51:41.493 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:51:41.498 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:51:41.504 else:
2025-07-01 05:51:41.510 # the synch pair is identical
2025-07-01 05:51:41.520 yield ' ' + aelt
2025-07-01 05:51:41.532
2025-07-01 05:51:41.540 # pump out diffs from after the synch point
2025-07-01 05:51:41.546 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:51:41.551
2025-07-01 05:51:41.557 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:51:41.564 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:51:41.569
2025-07-01 05:51:41.574 self = <difflib.Differ object at [hex]>
2025-07-01 05:51:41.584 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:51:41.589 alo = 523, ahi = 1101
2025-07-01 05:51:41.595 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:51:41.600 blo = 523, bhi = 1101
2025-07-01 05:51:41.604
2025-07-01 05:51:41.610 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:51:41.616 g = []
2025-07-01 05:51:41.621 if alo < ahi:
2025-07-01 05:51:41.627 if blo < bhi:
2025-07-01 05:51:41.636 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:51:41.648 else:
2025-07-01 05:51:41.656 g = self._dump('-', a, alo, ahi)
2025-07-01 05:51:41.664 elif blo < bhi:
2025-07-01 05:51:41.671 g = self._dump('+', b, blo, bhi)
2025-07-01 05:51:41.681
2025-07-01 05:51:41.688 > yield from g
2025-07-01 05:51:41.695
2025-07-01 05:51:41.703 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:51:41.711 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:51:41.717
2025-07-01 05:51:41.727 self = <difflib.Differ object at [hex]>
2025-07-01 05:51:41.740 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:51:41.749 alo = 523, ahi = 1101
2025-07-01 05:51:41.757 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:51:41.764 blo = 523, bhi = 1101
2025-07-01 05:51:41.770
2025-07-01 05:51:41.776 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:51:41.782 r"""
2025-07-01 05:51:41.788 When replacing one block of lines with another, search the blocks
2025-07-01 05:51:41.796 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:51:41.809 synch point, and intraline difference marking is done on the
2025-07-01 05:51:41.819 similar pair. Lots of work, but often worth it.
2025-07-01 05:51:41.831
2025-07-01 05:51:41.840 Example:
2025-07-01 05:51:41.847
2025-07-01 05:51:41.854 >>> d = Differ()
2025-07-01 05:51:41.860 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:51:41.867 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:51:41.874 >>> print(''.join(results), end="")
2025-07-01 05:51:41.879 - abcDefghiJkl
2025-07-01 05:51:41.890 + abcdefGhijkl
2025-07-01 05:51:41.903 """
2025-07-01 05:51:41.913
2025-07-01 05:51:41.921 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:51:41.927 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:51:41.934 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:51:41.940 cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:51:41.946 eqi, eqj = None, None # 1st indices of equal lines (if any)
2025-07-01 05:51:41.951
2025-07-01 05:51:41.958 # search for the pair that matches best without being identical
2025-07-01 05:51:41.966 # (identical lines must be junk lines, & we don't want to synch up
2025-07-01 05:51:41.973 # on junk -- unless we have to)
2025-07-01 05:51:41.979 for j in range(blo, bhi):
2025-07-01 05:51:41.985 bj = b[j]
2025-07-01 05:51:41.989 cruncher.set_seq2(bj)
2025-07-01 05:51:41.994 for i in range(alo, ahi):
2025-07-01 05:51:41.998 ai = a[i]
2025-07-01 05:51:42.003 if ai == bj:
2025-07-01 05:51:42.008 if eqi is None:
2025-07-01 05:51:42.012 eqi, eqj = i, j
2025-07-01 05:51:42.023 continue
2025-07-01 05:51:42.032 cruncher.set_seq1(ai)
2025-07-01 05:51:42.038 # computing similarity is expensive, so use the quick
2025-07-01 05:51:42.044 # upper bounds first -- have seen this speed up messy
2025-07-01 05:51:42.049 # compares by a factor of 3.
2025-07-01 05:51:42.054 # note that ratio() is only expensive to compute the first
2025-07-01 05:51:42.058 # time it's called on a sequence pair; the expensive part
2025-07-01 05:51:42.064 # of the computation is cached by cruncher
2025-07-01 05:51:42.072 if cruncher.real_quick_ratio() > best_ratio and \
2025-07-01 05:51:42.079 cruncher.quick_ratio() > best_ratio and \
2025-07-01 05:51:42.086 cruncher.ratio() > best_ratio:
2025-07-01 05:51:42.093 best_ratio, best_i, best_j = cruncher.ratio(), i, j
2025-07-01 05:51:42.102 if best_ratio < cutoff:
2025-07-01 05:51:42.110 # no non-identical "pretty close" pair
2025-07-01 05:51:42.119 if eqi is None:
2025-07-01 05:51:42.129 # no identical pair either -- treat it as a straight replace
2025-07-01 05:51:42.137 yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:51:42.144 return
2025-07-01 05:51:42.149 # no close pair, but an identical pair -- synch up on that
2025-07-01 05:51:42.154 best_i, best_j, best_ratio = eqi, eqj, 1.0
2025-07-01 05:51:42.159 else:
2025-07-01 05:51:42.164 # there's a close pair, so forget the identical pair (if any)
2025-07-01 05:51:42.169 eqi = None
2025-07-01 05:51:42.174
2025-07-01 05:51:42.180 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
2025-07-01 05:51:42.185 # identical
2025-07-01 05:51:42.193
2025-07-01 05:51:42.200 # pump out diffs from before the synch point
2025-07-01 05:51:42.207 yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
2025-07-01 05:51:42.212
2025-07-01 05:51:42.218 # do intraline marking on the synch pair
2025-07-01 05:51:42.224 aelt, belt = a[best_i], b[best_j]
2025-07-01 05:51:42.229 if eqi is None:
2025-07-01 05:51:42.235 # pump out a '-', '?', '+', '?' quad for the synched lines
2025-07-01 05:51:42.240 atags = btags = ""
2025-07-01 05:51:42.246 cruncher.set_seqs(aelt, belt)
2025-07-01 05:51:42.252 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
2025-07-01 05:51:42.259 la, lb = ai2 - ai1, bj2 - bj1
2025-07-01 05:51:42.265 if tag == 'replace':
2025-07-01 05:51:42.273 atags += '^' * la
2025-07-01 05:51:42.286 btags += '^' * lb
2025-07-01 05:51:42.294 elif tag == 'delete':
2025-07-01 05:51:42.302 atags += '-' * la
2025-07-01 05:51:42.308 elif tag == 'insert':
2025-07-01 05:51:42.315 btags += '+' * lb
2025-07-01 05:51:42.325 elif tag == 'equal':
2025-07-01 05:51:42.334 atags += ' ' * la
2025-07-01 05:51:42.341 btags += ' ' * lb
2025-07-01 05:51:42.346 else:
2025-07-01 05:51:42.352 raise ValueError('unknown tag %r' % (tag,))
2025-07-01 05:51:42.358 yield from self._qformat(aelt, belt, atags, btags)
2025-07-01 05:51:42.364 else:
2025-07-01 05:51:42.372 # the synch pair is identical
2025-07-01 05:51:42.379 yield ' ' + aelt
2025-07-01 05:51:42.388
2025-07-01 05:51:42.396 # pump out diffs from after the synch point
2025-07-01 05:51:42.409 > yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
2025-07-01 05:51:42.421
2025-07-01 05:51:42.434 /usr/lib/python3.11/difflib.py:985:
2025-07-01 05:51:42.443 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:51:42.450
2025-07-01 05:51:42.455 self = <difflib.Differ object at [hex]>
2025-07-01 05:51:42.462 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:51:42.476 alo = 524, ahi = 1101
2025-07-01 05:51:42.484 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:51:42.491 blo = 524, bhi = 1101
2025-07-01 05:51:42.498
2025-07-01 05:51:42.505 def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:51:42.512 g = []
2025-07-01 05:51:42.518 if alo < ahi:
2025-07-01 05:51:42.523 if blo < bhi:
2025-07-01 05:51:42.529 g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
2025-07-01 05:51:42.536 else:
2025-07-01 05:51:42.542 g = self._dump('-', a, alo, ahi)
2025-07-01 05:51:42.552 elif blo < bhi:
2025-07-01 05:51:42.562 g = self._dump('+', b, blo, bhi)
2025-07-01 05:51:42.570
2025-07-01 05:51:42.576 > yield from g
2025-07-01 05:51:42.582
2025-07-01 05:51:42.587 /usr/lib/python3.11/difflib.py:997:
2025-07-01 05:51:42.593 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:51:42.599
2025-07-01 05:51:42.607 self = <difflib.Differ object at [hex]>
2025-07-01 05:51:42.613 a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
2025-07-01 05:51:42.619 alo = 524, ahi = 1101
2025-07-01 05:51:42.625 b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
2025-07-01 05:51:42.631 blo = 524, bhi = 1101
2025-07-01 05:51:42.636
2025-07-01 05:51:42.642 def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
2025-07-01 05:51:42.648 r"""
2025-07-01 05:51:42.654 When replacing one block of lines with another, search the blocks
2025-07-01 05:51:42.662 for *similar* lines; the best-matching pair (if any) is used as a
2025-07-01 05:51:42.667 synch point, and intraline difference marking is done on the
2025-07-01 05:51:42.672 similar pair. Lots of work, but often worth it.
2025-07-01 05:51:42.678
2025-07-01 05:51:42.683 Example:
2025-07-01 05:51:42.689
2025-07-01 05:51:42.695 >>> d = Differ()
2025-07-01 05:51:42.703 >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
2025-07-01 05:51:42.714 ... ['abcdefGhijkl\n'], 0, 1)
2025-07-01 05:51:42.722 >>> print(''.join(results), end="")
2025-07-01 05:51:42.732 - abcDefghiJkl
2025-07-01 05:51:42.751 + abcdefGhijkl
2025-07-01 05:51:42.763 """
2025-07-01 05:51:42.768
2025-07-01 05:51:42.775 # don't synch up unless the lines have a similarity score of at
2025-07-01 05:51:42.783 # least cutoff; best_ratio tracks the best score seen so far
2025-07-01 05:51:42.794 best_ratio, cutoff = 0.74, 0.75
2025-07-01 05:51:42.802 > cruncher = SequenceMatcher(self.charjunk)
2025-07-01 05:51:42.808
2025-07-01 05:51:42.814 /usr/lib/python3.11/difflib.py:915:
2025-07-01 05:51:42.820 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:51:42.825
2025-07-01 05:51:42.837 self = <difflib.SequenceMatcher object at [hex]>
2025-07-01 05:51:42.844 isjunk = <function IS_CHARACTER_JUNK at 0x7f4f3e6634c0>, a = '', b = ''
2025-07-01 05:51:42.850 autojunk = True
2025-07-01 05:51:42.857
2025-07-01 05:51:42.864 def __init__(self, isjunk=None, a='', b='', autojunk=True):
2025-07-01 05:51:42.871 """Construct a SequenceMatcher.
2025-07-01 05:51:42.879
2025-07-01 05:51:42.886 Optional arg isjunk is None (the default), or a one-argument
2025-07-01 05:51:42.894 function that takes a sequence element and returns true iff the
2025-07-01 05:51:42.900 element is junk. None is equivalent to passing "lambda x: 0", i.e.
2025-07-01 05:51:42.906 no elements are considered to be junk. For example, pass
2025-07-01 05:51:42.912 lambda x: x in " \\t"
2025-07-01 05:51:42.918 if you're comparing lines as sequences of characters, and don't
2025-07-01 05:51:42.925 want to synch up on blanks or hard tabs.
2025-07-01 05:51:42.931
2025-07-01 05:51:42.939 Optional arg a is the first of two sequences to be compared. By
2025-07-01 05:51:42.947 default, an empty string. The elements of a must be hashable. See
2025-07-01 05:51:42.958 also .set_seqs() and .set_seq1().
2025-07-01 05:51:42.965
2025-07-01 05:51:42.976 Optional arg b is the second of two sequences to be compared. By
2025-07-01 05:51:42.985 default, an empty string. The elements of b must be hashable. See
2025-07-01 05:51:42.990 also .set_seqs() and .set_seq2().
2025-07-01 05:51:42.995
2025-07-01 05:51:43.000 Optional arg autojunk should be set to False to disable the
2025-07-01 05:51:43.006 "automatic junk heuristic" that treats popular elements as junk
2025-07-01 05:51:43.013 (see module documentation for more information).
2025-07-01 05:51:43.019 """
2025-07-01 05:51:43.024
2025-07-01 05:51:43.033 # Members:
2025-07-01 05:51:43.046 # a
2025-07-01 05:51:43.055 # first sequence
2025-07-01 05:51:43.062 # b
2025-07-01 05:51:43.068 # second sequence; differences are computed as "what do
2025-07-01 05:51:43.074 # we need to do to 'a' to change it into 'b'?"
2025-07-01 05:51:43.080 # b2j
2025-07-01 05:51:43.086 # for x in b, b2j[x] is a list of the indices (into b)
2025-07-01 05:51:43.092 # at which x appears; junk and popular elements do not appear
2025-07-01 05:51:43.099 # fullbcount
2025-07-01 05:51:43.106 # for x in b, fullbcount[x] == the number of times x
2025-07-01 05:51:43.112 # appears in b; only materialized if really needed (used
2025-07-01 05:51:43.119 # only for computing quick_ratio())
2025-07-01 05:51:43.124 # matching_blocks
2025-07-01 05:51:43.130 # a list of (i, j, k) triples, where a[i:i+k] == b[j:j+k];
2025-07-01 05:51:43.139 # ascending & non-overlapping in i and in j; terminated by
2025-07-01 05:51:43.151 # a dummy (len(a), len(b), 0) sentinel
2025-07-01 05:51:43.161 # opcodes
2025-07-01 05:51:43.169 # a list of (tag, i1, i2, j1, j2) tuples, where tag is
2025-07-01 05:51:43.176 # one of
2025-07-01 05:51:43.183 # 'replace' a[i1:i2] should be replaced by b[j1:j2]
2025-07-01 05:51:43.191 # 'delete' a[i1:i2] should be deleted
2025-07-01 05:51:43.203 # 'insert' b[j1:j2] should be inserted
2025-07-01 05:51:43.211 # 'equal' a[i1:i2] == b[j1:j2]
2025-07-01 05:51:43.219 # isjunk
2025-07-01 05:51:43.226 # a user-supplied function taking a sequence element and
2025-07-01 05:51:43.236 # returning true iff the element is "junk" -- this has
2025-07-01 05:51:43.246 # subtle but helpful effects on the algorithm, which I'll
2025-07-01 05:51:43.255 # get around to writing up someday <0.9 wink>.
2025-07-01 05:51:43.268 # DON'T USE! Only __chain_b uses this. Use "in self.bjunk".
2025-07-01 05:51:43.278 # bjunk
2025-07-01 05:51:43.287 # the items in b for which isjunk is True.
2025-07-01 05:51:43.299 # bpopular
2025-07-01 05:51:43.312 # nonjunk items in b treated as junk by the heuristic (if used).
2025-07-01 05:51:43.323
2025-07-01 05:51:43.332 self.isjunk = isjunk
2025-07-01 05:51:43.344 self.a = self.b = None
2025-07-01 05:51:43.357 self.autojunk = autojunk
2025-07-01 05:51:43.367 > self.set_seqs(a, b)
2025-07-01 05:51:43.378
2025-07-01 05:51:43.388 /usr/lib/python3.11/difflib.py:182:
2025-07-01 05:51:43.397 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:51:43.403
2025-07-01 05:51:43.410 self = <difflib.SequenceMatcher object at [hex]>, a = '', b = ''
2025-07-01 05:51:43.417
2025-07-01 05:51:43.423 def set_seqs(self, a, b):
2025-07-01 05:51:43.430 """Set the two sequences to be compared.
2025-07-01 05:51:43.440
2025-07-01 05:51:43.448 >>> s = SequenceMatcher()
2025-07-01 05:51:43.454 >>> s.set_seqs("abcd", "bcde")
2025-07-01 05:51:43.460 >>> s.ratio()
2025-07-01 05:51:43.466 0.75
2025-07-01 05:51:43.471 """
2025-07-01 05:51:43.476
2025-07-01 05:51:43.482 self.set_seq1(a)
2025-07-01 05:51:43.491 > self.set_seq2(b)
2025-07-01 05:51:43.500
2025-07-01 05:51:43.508 /usr/lib/python3.11/difflib.py:194:
2025-07-01 05:51:43.515 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2025-07-01 05:51:43.522
2025-07-01 05:51:43.533 self = <difflib.SequenceMatcher object at [hex]>, b = ''
2025-07-01 05:51:43.541
2025-07-01 05:51:43.548 def set_seq2(self, b):
2025-07-01 05:51:43.555 """Set the second sequence to be compared.
2025-07-01 05:51:43.561
2025-07-01 05:51:43.573 The first sequence to be compared is not changed.
2025-07-01 05:51:43.582
2025-07-01 05:51:43.590 >>> s = SequenceMatcher(None, "abcd", "bcde")
2025-07-01 05:51:43.596 >>> s.ratio()
2025-07-01 05:51:43.603 0.75
2025-07-01 05:51:43.609 >>> s.set_seq2("abcd")
2025-07-01 05:51:43.615 >>> s.ratio()
2025-07-01 05:51:43.621 1.0
2025-07-01 05:51:43.628 >>>
2025-07-01 05:51:43.634
2025-07-01 05:51:43.642 SequenceMatcher computes and caches detailed information about the
2025-07-01 05:51:43.649 second sequence, so if you want to compare one sequence S against
2025-07-01 05:51:43.655 many sequences, use .set_seq2(S) once and call .set_seq1(x)
2025-07-01 05:51:43.662 repeatedly for each of the other sequences.
2025-07-01 05:51:43.668
2025-07-01 05:51:43.680 See also set_seqs() and set_seq1().
2025-07-01 05:51:43.689 """
2025-07-01 05:51:43.697
2025-07-01 05:51:43.708 if b is self.b:
2025-07-01 05:51:43.719 return
2025-07-01 05:51:43.726 self.b = b
2025-07-01 05:51:43.733 self.matching_blocks = self.opcodes = None
2025-07-01 05:51:43.744 self.fullbcount = None
2025-07-01 05:51:43.753 > self.__chain_b()
2025-07-01 05:51:43.761 E RecursionError: maximum recursion depth exceeded
2025-07-01 05:51:43.768
2025-07-01 05:51:43.774 /usr/lib/python3.11/difflib.py:248: RecursionError
2025-07-01 05:51:43.781 ---------------------------- Captured stdout setup -----------------------------
2025-07-01 05:51:43.791 Creating db: localhost:/var/tmp/qa_2024/test_1507/test.fdb [page_size=None, sql_dialect=None, charset='NONE', user=SYSDBA, password=masterkey]
|
3 #text |
act = <firebird.qa.plugin.Action pytest object at [hex]>
@pytest.mark.version('>=3')
def test_1(act: Action):
act.expected_stdout = expected_stdout
act.execute()
> assert act.clean_stdout == act.clean_expected_stdout
tests/bugs/core_2969_test.py:1211:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
ops = ('==',), results = (False,)
expls = ('%(py2)s\n{%(py2)s = %(py0)s.clean_stdout\n} == %(py6)s\n{%(py6)s = %(py4)s.clean_expected_stdout\n}',)
each_obj = ('WAS_OVERWRITTEN CTX_KEY CTX_VAL\n=============== ============================== =======\n1var...yyy\n1 var_997 yyy\n1 var_998 yyy\n1 var_999 yyy')
def _call_reprcompare(
ops: Sequence[str],
results: Sequence[bool],
expls: Sequence[str],
each_obj: Sequence[object],
) -> str:
for i, res, expl in zip(range(len(ops)), results, expls):
try:
done = not res
except Exception:
done = True
if done:
break
if util._reprcompare is not None:
> custom = util._reprcompare(ops[i], each_obj[i], each_obj[i + 1])
../lib/python3.11/site-packages/_pytest/assertion/rewrite.py:499:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
op = '=='
left = 'WAS_OVERWRITTEN CTX_KEY CTX_VAL\n=============== ============================== =======\n1var_... yyy\n1var_997 yyy\n1var_998 yyy\n1var_999 yyy'
right = 'WAS_OVERWRITTEN CTX_KEY CTX_VAL\n=============== ============================== =======\n1 var... yyy\n1 var_997 yyy\n1 var_998 yyy\n1 var_999 yyy'
def callbinrepr(op, left: object, right: object) -> Optional[str]:
"""Call the pytest_assertrepr_compare hook and prepare the result.
This uses the first result from the hook and then ensures the
following:
* Overly verbose explanations are truncated unless configured otherwise
(eg. if running in verbose mode).
* Embedded newlines are escaped to help util.format_explanation()
later.
* If the rewrite mode is used embedded %-characters are replaced
to protect later % formatting.
The result can be formatted by util.format_explanation() for
pretty printing.
"""
> hook_result = ihook.pytest_assertrepr_compare(
config=item.config, op=op, left=left, right=right
)
../lib/python3.11/site-packages/_pytest/assertion/__init__.py:141:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <HookCaller 'pytest_assertrepr_compare'>
kwargs = {'config': <_pytest.config.Config pytest object at [hex]>, 'left': 'WAS_OVERWRITTEN CTX_KEY C...yyy\n1 var_997 yyy\n1 var_998 yyy\n1 var_999 yyy'}
firstresult = False
def __call__(self, **kwargs: object) -> Any:
"""Call the hook.
Only accepts keyword arguments, which should match the hook
specification.
Returns the result(s) of calling all registered plugins, see
:ref:`calling`.
"""
assert (
not self.is_historic()
), "Cannot directly call a historic hook - use call_historic instead."
self._verify_all_args_are_provided(kwargs)
firstresult = self.spec.opts.get("firstresult", False) if self.spec else False
> return self._hookexec(self.name, self._hookimpls, kwargs, firstresult)
../lib/python3.11/site-packages/pluggy/_hooks.py:493:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <_pytest.config.PytestPluginManager pytest object at [hex]>
hook_name = 'pytest_assertrepr_compare'
methods = [<HookImpl plugin_name='assertion', plugin=<module '_pytest.assertion' from '/opt/distr/venv/lib/python3.11/site-packa...in_name='firebird', plugin=<module 'firebird.qa.plugin' from '/opt/distr/venv/firebird-qa/src/firebird/qa/plugin.py'>>]
kwargs = {'config': <_pytest.config.Config pytest object at [hex]>, 'left': 'WAS_OVERWRITTEN CTX_KEY C...yyy\n1 var_997 yyy\n1 var_998 yyy\n1 var_999 yyy'}
firstresult = False
def _hookexec(
self,
hook_name: str,
methods: Sequence[HookImpl],
kwargs: Mapping[str, object],
firstresult: bool,
) -> object | list[object]:
# called from all hookcaller instances.
# enable_tracing will set its own wrapping function at self._inner_hookexec
> return self._inner_hookexec(hook_name, methods, kwargs, firstresult)
../lib/python3.11/site-packages/pluggy/_manager.py:115:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
config = <_pytest.config.Config pytest object at [hex]>, op = '=='
left = 'WAS_OVERWRITTEN CTX_KEY CTX_VAL\n=============== ============================== =======\n1var_... yyy\n1var_997 yyy\n1var_998 yyy\n1var_999 yyy'
right = 'WAS_OVERWRITTEN CTX_KEY CTX_VAL\n=============== ============================== =======\n1 var... yyy\n1 var_997 yyy\n1 var_998 yyy\n1 var_999 yyy'
def pytest_assertrepr_compare(config: Config, op: str, left: object, right: object) -> Optional[List[str]]:
"""Returns explanation for comparisons in failing assert expressions.
If both objects are `str`, uses `difflib.ndiff` to provide explanation.
"""
if isinstance(left, str) and isinstance(right, str) and op == "==":
# 16.11.2023, pzotov: we have to put empty string at the beginning of each comparing lists.
# Otherwise first diff will be at the same line as 'assert' phrase, which causes readability be poor.
#
left_lines = ['']
left_lines.extend(left.splitlines())
right_lines = ['']
right_lines.extend(right.splitlines())
# 16.11.2023, pzotov
# ndiff output must be interpreted as following:
# * "E - <some text>" ==> MISSED line (it was in EXPECTED text but absent in actual one).
# * "E + <some_text>" ==> EXCESSIVE line (it is not in EXPECTED text but did appear in actual).
# But for QA-purposes, this output must answer the question:
# "what must be changed in ACTUAL output so that it became equal to EXPECTED"
# (i.e. how to "REVERT" actual back to expected).
# In order to see such result, we have to specify 'right_lines' to the 1st argument that is passed to ndiff().
# ::: NB :::
# We assume that all tests are written so that ACTUAL output is left side in 'assert' statement and EXPECTED
# is right side, e.g: assert act.clean_stdout == act.clean_expected_stdout
# This requirement is CRUCIAL if we use ndiff() instead of default pytest comparison method!
#
> return list(ndiff(right_lines, left_lines))
src/firebird/qa/plugin.py:608:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
def compare(self, a, b):
r"""
Compare two sequences of lines; generate the resulting delta.
Each sequence must contain individual single-line strings ending with
newlines. Such sequences can be obtained from the `readlines()` method
of file-like objects. The delta generated also consists of newline-
terminated strings, ready to be printed as-is via the writelines()
method of a file-like object.
Example:
>>> print(''.join(Differ().compare('one\ntwo\nthree\n'.splitlines(True),
... 'ore\ntree\nemu\n'.splitlines(True))),
... end="")
- one
+ ore
- two
- three
+ tree
+ emu
"""
cruncher = SequenceMatcher(self.linejunk, a, b)
for tag, alo, ahi, blo, bhi in cruncher.get_opcodes():
if tag == 'replace':
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
elif tag == 'delete':
g = self._dump('-', a, alo, ahi)
elif tag == 'insert':
g = self._dump('+', b, blo, bhi)
elif tag == 'equal':
g = self._dump(' ', a, alo, ahi)
else:
raise ValueError('unknown tag %r' % (tag,))
> yield from g
/usr/lib/python3.11/difflib.py:872:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 3, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 3, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 4, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 4, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 4, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 4, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 5, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 5, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 5, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 5, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 6, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 6, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 6, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 6, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 7, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 7, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 7, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 7, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 8, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 8, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 8, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 8, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 9, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 9, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 9, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 9, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 10, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 10, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 10, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 10, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 11, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 11, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 11, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 11, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 12, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 12, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 12, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 12, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 13, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 13, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 13, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 13, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 14, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 14, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 14, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 14, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 15, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 15, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 15, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 15, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 16, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 16, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 16, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 16, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 17, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 17, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 17, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 17, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 18, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 18, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 18, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 18, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 19, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 19, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 19, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 19, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 20, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 20, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 20, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 20, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 21, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 21, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 21, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 21, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 22, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 22, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 22, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 22, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 23, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 23, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 23, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 23, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 26, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 26, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 26, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 26, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 27, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 27, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 27, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 27, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 28, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 28, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 28, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 28, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 29, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 29, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 29, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 29, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 30, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 30, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 30, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 30, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 31, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 31, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 31, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 31, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 32, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 32, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 32, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 32, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 33, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 33, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 33, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 33, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 34, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 34, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 34, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 34, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 35, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 35, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 35, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 35, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 36, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 36, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 36, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 36, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 37, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 37, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 37, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 37, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 38, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 38, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 38, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 38, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 39, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 39, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 39, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 39, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 40, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 40, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 40, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 40, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 41, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 41, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 41, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 41, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 42, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 42, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 42, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 42, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 43, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 43, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 43, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 43, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 44, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 44, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 44, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 44, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 45, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 45, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 45, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 45, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 48, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 48, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 48, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 48, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 49, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 49, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 49, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 49, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 50, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 50, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 50, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 50, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 51, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 51, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 51, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 51, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 52, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 52, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 52, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 52, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 53, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 53, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 53, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 53, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 54, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 54, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 54, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 54, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 55, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 55, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 55, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 55, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 56, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 56, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 56, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 56, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 57, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 57, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 57, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 57, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 58, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 58, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 58, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 58, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 59, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 59, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 59, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 59, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 60, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 60, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 60, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 60, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 61, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 61, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 61, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 61, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 62, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 62, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 62, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 62, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 63, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 63, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 63, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 63, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 64, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 64, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 64, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 64, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 65, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 65, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 65, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 65, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 66, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 66, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 66, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 66, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 67, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 67, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 67, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 67, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 70, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 70, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 70, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 70, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 71, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 71, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 71, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 71, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 72, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 72, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 72, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 72, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 73, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 73, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 73, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 73, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 74, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 74, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 74, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 74, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 75, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 75, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 75, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 75, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 76, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 76, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 76, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 76, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 77, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 77, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 77, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 77, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 78, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 78, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 78, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 78, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 79, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 79, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 79, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 79, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 80, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 80, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 80, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 80, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 81, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 81, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 81, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 81, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 82, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 82, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 82, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 82, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 83, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 83, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 83, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 83, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 84, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 84, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 84, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 84, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 85, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 85, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 85, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 85, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 86, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 86, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 86, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 86, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 87, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 87, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 87, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 87, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 88, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 88, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 88, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 88, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 89, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 89, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 89, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 89, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 92, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 92, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 92, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 92, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 93, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 93, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 93, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 93, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 94, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 94, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 94, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 94, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 95, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 95, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 95, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 95, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 96, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 96, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 96, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 96, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 97, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 97, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 97, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 97, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 98, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 98, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 98, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 98, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 99, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 99, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 99, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 99, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 100, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 100, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 100, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 100, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 101, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 101, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 101, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 101, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 102, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 102, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 102, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 102, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 103, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 103, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 103, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 103, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 104, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 104, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 104, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 104, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 105, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 105, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 105, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 105, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 106, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 106, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 106, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 106, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 107, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 107, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 107, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 107, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 108, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 108, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 108, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 108, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 109, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 109, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 109, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 109, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 110, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 110, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 110, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 110, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 111, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 111, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 111, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 111, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 114, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 114, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 114, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 114, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 115, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 115, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 115, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 115, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 116, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 116, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 116, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 116, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 117, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 117, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 117, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 117, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 118, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 118, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 118, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 118, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 119, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 119, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 119, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 119, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 120, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 120, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 120, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 120, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 121, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 121, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 121, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 121, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 122, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 122, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 122, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 122, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 123, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 123, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 123, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 123, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 124, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 124, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 124, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 124, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 125, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 125, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 125, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 125, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 126, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 126, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 126, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 126, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 127, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 127, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 127, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 127, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 128, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 128, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 128, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 128, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 129, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 129, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 129, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 129, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 130, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 130, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 130, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 130, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 131, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 131, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 131, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 131, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 132, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 132, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 132, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 132, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 133, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 133, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 133, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 133, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 136, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 136, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 136, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 136, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 137, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 137, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 137, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 137, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 138, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 138, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 138, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 138, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 139, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 139, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 139, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 139, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 140, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 140, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 140, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 140, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 141, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 141, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 141, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 141, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 142, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 142, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 142, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 142, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 143, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 143, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 143, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 143, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 144, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 144, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 144, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 144, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 145, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 145, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 145, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 145, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 146, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 146, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 146, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 146, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 147, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 147, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 147, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 147, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 148, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 148, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 148, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 148, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 149, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 149, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 149, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 149, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 150, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 150, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 150, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 150, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 151, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 151, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 151, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 151, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 152, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 152, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 152, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 152, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 153, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 153, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 153, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 153, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 154, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 154, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 154, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 154, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 155, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 155, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 155, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 155, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 158, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 158, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 158, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 158, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 159, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 159, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 159, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 159, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 160, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 160, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 160, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 160, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 161, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 161, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 161, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 161, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 162, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 162, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 162, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 162, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 163, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 163, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 163, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 163, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 164, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 164, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 164, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 164, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 165, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 165, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 165, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 165, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 166, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 166, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 166, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 166, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 167, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 167, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 167, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 167, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 168, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 168, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 168, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 168, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 169, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 169, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 169, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 169, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 170, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 170, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 170, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 170, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 171, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 171, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 171, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 171, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 172, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 172, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 172, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 172, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 173, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 173, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 173, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 173, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 174, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 174, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 174, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 174, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 175, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 175, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 175, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 175, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 176, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 176, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 176, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 176, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 177, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 177, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 177, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 177, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 180, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 180, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 180, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 180, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 181, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 181, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 181, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 181, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 182, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 182, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 182, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 182, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 183, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 183, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 183, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 183, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 184, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 184, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 184, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 184, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 185, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 185, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 185, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 185, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 186, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 186, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 186, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 186, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 187, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 187, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 187, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 187, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 188, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 188, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 188, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 188, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 189, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 189, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 189, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 189, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 190, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 190, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 190, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 190, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 191, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 191, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 191, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 191, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 192, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 192, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 192, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 192, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 193, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 193, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 193, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 193, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 194, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 194, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 194, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 194, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 195, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 195, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 195, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 195, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 196, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 196, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 196, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 196, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 197, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 197, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 197, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 197, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 198, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 198, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 198, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 198, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 199, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 199, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 199, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 199, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 202, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 202, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 202, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 202, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 203, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 203, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 203, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 203, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 204, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 204, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 204, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 204, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 205, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 205, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 205, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 205, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 206, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 206, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 206, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 206, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 207, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 207, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 207, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 207, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 208, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 208, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 208, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 208, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 209, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 209, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 209, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 209, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 210, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 210, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 210, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 210, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 211, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 211, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 211, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 211, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 212, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 212, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 212, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 212, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 213, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 213, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 213, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 213, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 214, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 214, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 214, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 214, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 215, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 215, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 215, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 215, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 216, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 216, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 216, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 216, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 217, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 217, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 217, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 217, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 218, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 218, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 218, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 218, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 219, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 219, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 219, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 219, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 220, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 220, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 220, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 220, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 221, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 221, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 221, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 221, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 224, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 224, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 224, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 224, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 225, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 225, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 225, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 225, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 226, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 226, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 226, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 226, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 227, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 227, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 227, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 227, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 228, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 228, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 228, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 228, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 229, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 229, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 229, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 229, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 230, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 230, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 230, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 230, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 231, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 231, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 231, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 231, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 232, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 232, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 232, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 232, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 233, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 233, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 233, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 233, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 234, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 234, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 234, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 234, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 235, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 235, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 235, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 235, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 236, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 236, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 236, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 236, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 237, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 237, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 237, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 237, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 238, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 238, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 238, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 238, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 239, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 239, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 239, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 239, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 240, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 240, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 240, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 240, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 241, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 241, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 241, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 241, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 242, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 242, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 242, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 242, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 243, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 243, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 243, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 243, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 246, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 246, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 246, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 246, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 247, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 247, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 247, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 247, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 248, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 248, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 248, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 248, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 249, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 249, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 249, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 249, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 250, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 250, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 250, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 250, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 251, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 251, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 251, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 251, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 252, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 252, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 252, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 252, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 253, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 253, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 253, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 253, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 254, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 254, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 254, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 254, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 255, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 255, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 255, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 255, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 256, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 256, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 256, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 256, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 257, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 257, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 257, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 257, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 258, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 258, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 258, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 258, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 259, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 259, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 259, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 259, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 260, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 260, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 260, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 260, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 261, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 261, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 261, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 261, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 262, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 262, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 262, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 262, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 263, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 263, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 263, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 263, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 264, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 264, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 264, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 264, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 265, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 265, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 265, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 265, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 268, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 268, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 268, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 268, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 269, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 269, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 269, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 269, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 270, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 270, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 270, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 270, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 271, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 271, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 271, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 271, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 272, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 272, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 272, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 272, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 273, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 273, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 273, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 273, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 274, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 274, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 274, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 274, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 275, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 275, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 275, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 275, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 276, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 276, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 276, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 276, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 277, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 277, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 277, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 277, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 278, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 278, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 278, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 278, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 279, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 279, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 279, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 279, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 280, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 280, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 280, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 280, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 281, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 281, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 281, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 281, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 282, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 282, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 282, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 282, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 283, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 283, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 283, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 283, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 284, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 284, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 284, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 284, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 285, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 285, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 285, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 285, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 286, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 286, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 286, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 286, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 287, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 287, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 287, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 287, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 290, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 290, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 290, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 290, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 291, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 291, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 291, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 291, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 292, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 292, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 292, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 292, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 293, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 293, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 293, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 293, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 294, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 294, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 294, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 294, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 295, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 295, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 295, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 295, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 296, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 296, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 296, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 296, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 297, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 297, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 297, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 297, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 298, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 298, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 298, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 298, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 299, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 299, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 299, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 299, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 300, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 300, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 300, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 300, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 301, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 301, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 301, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 301, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 302, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 302, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 302, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 302, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 303, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 303, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 303, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 303, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 304, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 304, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 304, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 304, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 305, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 305, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 305, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 305, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 306, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 306, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 306, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 306, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 307, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 307, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 307, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 307, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 308, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 308, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 308, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 308, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 309, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 309, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 309, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 309, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 312, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 312, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 312, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 312, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 313, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 313, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 313, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 313, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 314, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 314, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 314, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 314, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 315, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 315, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 315, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 315, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 316, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 316, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 316, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 316, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 317, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 317, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 317, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 317, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 318, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 318, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 318, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 318, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 319, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 319, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 319, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 319, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 320, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 320, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 320, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 320, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 321, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 321, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 321, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 321, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 322, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 322, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 322, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 322, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 323, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 323, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 323, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 323, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 324, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 324, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 324, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 324, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 325, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 325, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 325, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 325, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 326, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 326, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 326, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 326, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 327, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 327, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 327, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 327, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 328, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 328, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 328, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 328, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 329, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 329, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 329, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 329, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 330, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 330, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 330, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 330, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 331, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 331, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 331, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 331, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 334, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 334, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 334, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 334, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 335, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 335, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 335, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 335, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 336, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 336, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 336, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 336, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 337, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 337, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 337, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 337, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 338, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 338, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 338, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 338, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 339, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 339, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 339, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 339, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 340, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 340, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 340, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 340, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 341, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 341, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 341, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 341, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 342, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 342, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 342, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 342, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 343, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 343, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 343, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 343, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 344, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 344, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 344, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 344, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 345, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 345, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 345, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 345, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 346, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 346, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 346, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 346, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 347, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 347, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 347, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 347, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 348, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 348, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 348, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 348, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 349, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 349, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 349, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 349, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 350, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 350, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 350, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 350, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 351, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 351, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 351, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 351, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 352, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 352, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 352, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 352, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 353, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 353, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 353, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 353, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 356, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 356, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 356, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 356, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 357, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 357, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 357, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 357, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 358, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 358, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 358, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 358, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 359, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 359, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 359, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 359, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 360, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 360, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 360, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 360, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 361, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 361, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 361, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 361, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 362, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 362, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 362, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 362, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 363, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 363, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 363, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 363, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 364, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 364, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 364, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 364, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 365, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 365, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 365, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 365, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 366, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 366, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 366, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 366, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 367, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 367, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 367, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 367, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 368, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 368, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 368, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 368, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 369, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 369, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 369, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 369, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 370, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 370, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 370, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 370, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 371, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 371, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 371, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 371, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 372, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 372, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 372, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 372, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 373, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 373, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 373, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 373, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 374, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 374, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 374, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 374, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 375, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 375, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 375, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 375, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 378, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 378, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 378, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 378, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 379, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 379, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 379, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 379, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 380, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 380, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 380, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 380, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 381, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 381, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 381, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 381, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 382, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 382, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 382, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 382, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 383, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 383, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 383, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 383, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 384, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 384, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 384, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 384, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 385, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 385, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 385, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 385, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 386, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 386, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 386, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 386, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 387, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 387, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 387, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 387, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 388, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 388, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 388, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 388, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 389, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 389, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 389, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 389, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 390, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 390, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 390, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 390, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 391, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 391, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 391, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 391, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 392, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 392, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 392, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 392, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 393, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 393, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 393, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 393, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 394, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 394, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 394, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 394, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 395, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 395, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 395, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 395, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 396, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 396, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 396, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 396, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 397, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 397, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 397, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 397, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 400, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 400, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 400, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 400, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 401, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 401, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 401, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 401, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 402, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 402, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 402, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 402, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 403, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 403, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 403, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 403, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 404, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 404, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 404, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 404, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 405, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 405, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 405, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 405, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 406, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 406, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 406, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 406, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 407, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 407, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 407, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 407, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 408, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 408, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 408, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 408, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 409, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 409, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 409, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 409, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 410, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 410, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 410, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 410, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 411, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 411, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 411, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 411, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 412, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 412, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 412, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 412, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 413, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 413, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 413, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 413, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 414, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 414, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 414, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 414, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 415, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 415, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 415, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 415, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 416, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 416, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 416, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 416, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 417, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 417, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 417, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 417, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 418, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 418, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 418, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 418, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 419, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 419, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 419, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 419, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 422, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 422, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 422, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 422, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 423, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 423, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 423, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 423, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 424, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 424, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 424, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 424, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 425, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 425, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 425, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 425, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 426, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 426, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 426, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 426, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 427, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 427, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 427, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 427, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 428, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 428, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 428, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 428, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 429, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 429, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 429, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 429, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 430, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 430, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 430, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 430, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 431, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 431, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 431, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 431, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 432, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 432, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 432, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 432, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 433, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 433, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 433, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 433, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 434, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 434, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 434, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 434, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 435, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 435, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 435, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 435, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 436, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 436, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 436, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 436, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 437, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 437, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 437, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 437, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 438, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 438, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 438, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 438, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 439, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 439, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 439, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 439, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 440, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 440, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 440, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 440, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 441, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 441, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 441, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 441, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 444, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 444, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 444, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 444, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 445, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 445, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 445, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 445, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 446, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 446, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 446, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 446, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 447, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 447, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 447, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 447, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 448, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 448, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 448, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 448, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 449, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 449, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 449, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 449, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 450, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 450, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 450, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 450, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 451, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 451, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 451, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 451, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 452, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 452, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 452, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 452, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 453, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 453, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 453, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 453, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 454, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 454, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 454, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 454, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 455, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 455, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 455, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 455, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 456, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 456, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 456, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 456, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 457, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 457, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 457, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 457, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 458, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 458, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 458, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 458, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 459, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 459, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 459, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 459, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 460, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 460, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 460, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 460, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 461, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 461, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 461, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 461, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 462, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 462, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 462, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 462, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 463, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 463, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 463, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 463, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 466, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 466, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 466, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 466, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 467, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 467, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 467, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 467, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 468, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 468, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 468, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 468, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 469, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 469, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 469, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 469, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 470, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 470, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 470, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 470, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 471, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 471, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 471, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 471, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 472, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 472, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 472, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 472, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 473, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 473, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 473, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 473, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 474, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 474, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 474, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 474, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 475, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 475, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 475, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 475, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 476, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 476, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 476, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 476, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 477, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 477, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 477, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 477, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 478, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 478, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 478, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 478, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 479, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 479, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 479, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 479, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 480, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 480, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 480, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 480, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 481, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 481, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 481, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 481, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 482, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 482, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 482, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 482, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 483, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 483, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 483, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 483, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 484, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 484, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 484, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 484, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 485, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 485, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 485, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 485, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 488, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 488, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 488, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 488, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 489, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 489, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 489, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 489, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 490, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 490, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 490, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 490, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 491, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 491, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 491, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 491, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 492, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 492, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 492, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 492, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 493, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 493, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 493, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 493, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 494, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 494, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 494, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 494, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 495, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 495, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 495, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 495, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 496, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 496, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 496, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 496, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 497, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 497, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 497, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 497, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 498, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 498, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 498, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 498, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 499, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 499, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 499, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 499, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 500, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 500, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 500, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 500, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 501, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 501, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 501, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 501, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 502, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 502, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 502, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 502, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 503, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 503, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 503, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 503, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 504, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 504, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 504, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 504, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 505, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 505, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 505, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 505, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 506, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 506, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 506, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 506, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 507, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 507, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 507, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 507, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 510, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 510, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 510, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 510, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 511, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 511, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 511, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 511, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 512, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 512, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 512, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 512, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 513, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 513, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 513, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 513, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 514, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 514, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 514, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 514, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 515, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 515, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 515, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 515, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 516, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 516, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 516, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 516, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 517, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 517, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 517, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 517, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 518, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 518, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 518, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 518, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 519, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 519, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 519, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 519, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 520, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 520, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 520, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 520, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 521, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 521, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 521, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 521, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 522, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 522, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 522, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 522, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 523, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 523, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 523, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 523, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(self.charjunk)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
# pump out diffs from before the synch point
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
else:
# the synch pair is identical
yield ' ' + aelt
# pump out diffs from after the synch point
> yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
/usr/lib/python3.11/difflib.py:985:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 524, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 524, bhi = 1101
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
g = []
if alo < ahi:
if blo < bhi:
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
else:
g = self._dump('-', a, alo, ahi)
elif blo < bhi:
g = self._dump('+', b, blo, bhi)
> yield from g
/usr/lib/python3.11/difflib.py:997:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.Differ pytest object at [hex]>
a = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...r_0 yyy', '1 var_1 yyy', '1 var_2 yyy', ...]
alo = 524, ahi = 1101
b = ['', 'WAS_OVERWRITTEN CTX_KEY CTX_VAL', '=============== ============================== =======...var_0 yyy', '1var_1 yyy', '1var_2 yyy', ...]
blo = 524, bhi = 1101
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
r"""
When replacing one block of lines with another, search the blocks
for *similar* lines; the best-matching pair (if any) is used as a
synch point, and intraline difference marking is done on the
similar pair. Lots of work, but often worth it.
Example:
>>> d = Differ()
>>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
... ['abcdefGhijkl\n'], 0, 1)
>>> print(''.join(results), end="")
- abcDefghiJkl
+ abcdefGhijkl
"""
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
> cruncher = SequenceMatcher(self.charjunk)
/usr/lib/python3.11/difflib.py:915:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.SequenceMatcher pytest object at [hex]>
isjunk = <function IS_CHARACTER_JUNK at 0x7f4f3e6634c0>, a = '', b = ''
autojunk = True
def __init__(self, isjunk=None, a='', b='', autojunk=True):
"""Construct a SequenceMatcher.
Optional arg isjunk is None (the default), or a one-argument
function that takes a sequence element and returns true iff the
element is junk. None is equivalent to passing "lambda x: 0", i.e.
no elements are considered to be junk. For example, pass
lambda x: x in " \\t"
if you're comparing lines as sequences of characters, and don't
want to synch up on blanks or hard tabs.
Optional arg a is the first of two sequences to be compared. By
default, an empty string. The elements of a must be hashable. See
also .set_seqs() and .set_seq1().
Optional arg b is the second of two sequences to be compared. By
default, an empty string. The elements of b must be hashable. See
also .set_seqs() and .set_seq2().
Optional arg autojunk should be set to False to disable the
"automatic junk heuristic" that treats popular elements as junk
(see module documentation for more information).
"""
# Members:
# a
# first sequence
# b
# second sequence; differences are computed as "what do
# we need to do to 'a' to change it into 'b'?"
# b2j
# for x in b, b2j[x] is a list of the indices (into b)
# at which x appears; junk and popular elements do not appear
# fullbcount
# for x in b, fullbcount[x] == the number of times x
# appears in b; only materialized if really needed (used
# only for computing quick_ratio())
# matching_blocks
# a list of (i, j, k) triples, where a[i:i+k] == b[j:j+k];
# ascending & non-overlapping in i and in j; terminated by
# a dummy (len(a), len(b), 0) sentinel
# opcodes
# a list of (tag, i1, i2, j1, j2) tuples, where tag is
# one of
# 'replace' a[i1:i2] should be replaced by b[j1:j2]
# 'delete' a[i1:i2] should be deleted
# 'insert' b[j1:j2] should be inserted
# 'equal' a[i1:i2] == b[j1:j2]
# isjunk
# a user-supplied function taking a sequence element and
# returning true iff the element is "junk" -- this has
# subtle but helpful effects on the algorithm, which I'll
# get around to writing up someday <0.9 wink>.
# DON'T USE! Only __chain_b uses this. Use "in self.bjunk".
# bjunk
# the items in b for which isjunk is True.
# bpopular
# nonjunk items in b treated as junk by the heuristic (if used).
self.isjunk = isjunk
self.a = self.b = None
self.autojunk = autojunk
> self.set_seqs(a, b)
/usr/lib/python3.11/difflib.py:182:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.SequenceMatcher pytest object at [hex]>, a = '', b = ''
def set_seqs(self, a, b):
"""Set the two sequences to be compared.
>>> s = SequenceMatcher()
>>> s.set_seqs("abcd", "bcde")
>>> s.ratio()
0.75
"""
self.set_seq1(a)
> self.set_seq2(b)
/usr/lib/python3.11/difflib.py:194:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <difflib.SequenceMatcher pytest object at [hex]>, b = ''
def set_seq2(self, b):
"""Set the second sequence to be compared.
The first sequence to be compared is not changed.
>>> s = SequenceMatcher(None, "abcd", "bcde")
>>> s.ratio()
0.75
>>> s.set_seq2("abcd")
>>> s.ratio()
1.0
>>>
SequenceMatcher computes and caches detailed information about the
second sequence, so if you want to compare one sequence S against
many sequences, use .set_seq2(S) once and call .set_seq1(x)
repeatedly for each of the other sequences.
See also set_seqs() and set_seq1().
"""
if b is self.b:
return
self.b = b
self.matching_blocks = self.opcodes = None
self.fullbcount = None
> self.__chain_b()
E RecursionError: maximum recursion depth exceeded
/usr/lib/python3.11/difflib.py:248: RecursionError
|